Search in sources :

Example 16 with FieldInfo

use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.

the class SolrDocumentFetcher method decorateDocValueFields.

/**
   * This will fetch and add the docValues fields to a given SolrDocument/SolrInputDocument
   *
   * @param doc
   *          A SolrDocument or SolrInputDocument instance where docValues will be added
   * @param docid
   *          The lucene docid of the document to be populated
   * @param fields
   *          The list of docValues fields to be decorated
   */
public void decorateDocValueFields(@SuppressWarnings("rawtypes") SolrDocumentBase doc, int docid, Set<String> fields) throws IOException {
    final List<LeafReaderContext> leafContexts = searcher.getLeafContexts();
    final int subIndex = ReaderUtil.subIndex(docid, leafContexts);
    final int localId = docid - leafContexts.get(subIndex).docBase;
    final LeafReader leafReader = leafContexts.get(subIndex).reader();
    for (String fieldName : fields) {
        final SchemaField schemaField = searcher.getSchema().getFieldOrNull(fieldName);
        if (schemaField == null || !schemaField.hasDocValues() || doc.containsKey(fieldName)) {
            log.warn("Couldn't decorate docValues for field: [{}], schemaField: [{}]", fieldName, schemaField);
            continue;
        }
        FieldInfo fi = searcher.getFieldInfos().fieldInfo(fieldName);
        if (fi == null) {
            // Searcher doesn't have info about this field, hence ignore it.
            continue;
        }
        final DocValuesType dvType = fi.getDocValuesType();
        switch(dvType) {
            case NUMERIC:
                final NumericDocValues ndv = leafReader.getNumericDocValues(fieldName);
                if (ndv == null) {
                    continue;
                }
                Long val;
                if (ndv.advanceExact(localId)) {
                    val = ndv.longValue();
                } else {
                    continue;
                }
                Object newVal = val;
                if (schemaField.getType().isPointField()) {
                    // TODO: Maybe merge PointField with TrieFields here
                    NumberType type = schemaField.getType().getNumberType();
                    switch(type) {
                        case INTEGER:
                            newVal = val.intValue();
                            break;
                        case LONG:
                            newVal = val.longValue();
                            break;
                        case FLOAT:
                            newVal = Float.intBitsToFloat(val.intValue());
                            break;
                        case DOUBLE:
                            newVal = Double.longBitsToDouble(val);
                            break;
                        case DATE:
                            newVal = new Date(val);
                            break;
                        default:
                            throw new AssertionError("Unexpected PointType: " + type);
                    }
                } else {
                    if (schemaField.getType() instanceof TrieIntField) {
                        newVal = val.intValue();
                    } else if (schemaField.getType() instanceof TrieFloatField) {
                        newVal = Float.intBitsToFloat(val.intValue());
                    } else if (schemaField.getType() instanceof TrieDoubleField) {
                        newVal = Double.longBitsToDouble(val);
                    } else if (schemaField.getType() instanceof TrieDateField) {
                        newVal = new Date(val);
                    } else if (schemaField.getType() instanceof EnumField) {
                        newVal = ((EnumField) schemaField.getType()).intValueToStringValue(val.intValue());
                    }
                }
                doc.addField(fieldName, newVal);
                break;
            case BINARY:
                BinaryDocValues bdv = leafReader.getBinaryDocValues(fieldName);
                if (bdv == null) {
                    continue;
                }
                BytesRef value;
                if (bdv.advanceExact(localId)) {
                    value = BytesRef.deepCopyOf(bdv.binaryValue());
                } else {
                    continue;
                }
                doc.addField(fieldName, value);
                break;
            case SORTED:
                SortedDocValues sdv = leafReader.getSortedDocValues(fieldName);
                if (sdv == null) {
                    continue;
                }
                if (sdv.advanceExact(localId)) {
                    final BytesRef bRef = sdv.binaryValue();
                    // Special handling for Boolean fields since they're stored as 'T' and 'F'.
                    if (schemaField.getType() instanceof BoolField) {
                        doc.addField(fieldName, schemaField.getType().toObject(schemaField, bRef));
                    } else {
                        doc.addField(fieldName, bRef.utf8ToString());
                    }
                }
                break;
            case SORTED_NUMERIC:
                final SortedNumericDocValues numericDv = leafReader.getSortedNumericDocValues(fieldName);
                NumberType type = schemaField.getType().getNumberType();
                if (numericDv != null) {
                    if (numericDv.advance(localId) == localId) {
                        final List<Object> outValues = new ArrayList<Object>(numericDv.docValueCount());
                        for (int i = 0; i < numericDv.docValueCount(); i++) {
                            long number = numericDv.nextValue();
                            switch(type) {
                                case INTEGER:
                                    outValues.add((int) number);
                                    break;
                                case LONG:
                                    outValues.add(number);
                                    break;
                                case FLOAT:
                                    outValues.add(NumericUtils.sortableIntToFloat((int) number));
                                    break;
                                case DOUBLE:
                                    outValues.add(NumericUtils.sortableLongToDouble(number));
                                    break;
                                case DATE:
                                    outValues.add(new Date(number));
                                    break;
                                default:
                                    throw new AssertionError("Unexpected PointType: " + type);
                            }
                        }
                        assert outValues.size() > 0;
                        doc.addField(fieldName, outValues);
                    }
                }
            case SORTED_SET:
                final SortedSetDocValues values = leafReader.getSortedSetDocValues(fieldName);
                if (values != null && values.getValueCount() > 0) {
                    if (values.advance(localId) == localId) {
                        final List<Object> outValues = new LinkedList<>();
                        for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values.nextOrd()) {
                            value = values.lookupOrd(ord);
                            outValues.add(schemaField.getType().toObject(schemaField, value));
                        }
                        assert outValues.size() > 0;
                        doc.addField(fieldName, outValues);
                    }
                }
            case NONE:
                break;
        }
    }
}
Also used : NumericDocValues(org.apache.lucene.index.NumericDocValues) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) SortedNumericDocValues(org.apache.lucene.index.SortedNumericDocValues) TrieIntField(org.apache.solr.schema.TrieIntField) ArrayList(java.util.ArrayList) TrieDateField(org.apache.solr.schema.TrieDateField) BinaryDocValues(org.apache.lucene.index.BinaryDocValues) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) DocValuesType(org.apache.lucene.index.DocValuesType) TrieFloatField(org.apache.solr.schema.TrieFloatField) BytesRef(org.apache.lucene.util.BytesRef) TrieDoubleField(org.apache.solr.schema.TrieDoubleField) EnumField(org.apache.solr.schema.EnumField) BoolField(org.apache.solr.schema.BoolField) LeafReader(org.apache.lucene.index.LeafReader) Date(java.util.Date) SortedDocValues(org.apache.lucene.index.SortedDocValues) LinkedList(java.util.LinkedList) SchemaField(org.apache.solr.schema.SchemaField) NumberType(org.apache.solr.schema.NumberType) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) FieldInfo(org.apache.lucene.index.FieldInfo)

Example 17 with FieldInfo

use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.

the class SolrDocumentFetcher method visitFromCached.

/** Executes a stored field visitor against a hit from the document cache */
private void visitFromCached(Document document, StoredFieldVisitor visitor) throws IOException {
    for (IndexableField f : document) {
        final FieldInfo info = searcher.getFieldInfos().fieldInfo(f.name());
        final StoredFieldVisitor.Status needsField = visitor.needsField(info);
        if (needsField == StoredFieldVisitor.Status.STOP)
            return;
        if (needsField == StoredFieldVisitor.Status.NO)
            continue;
        BytesRef binaryValue = f.binaryValue();
        if (binaryValue != null) {
            visitor.binaryField(info, toByteArrayUnwrapIfPossible(binaryValue));
            continue;
        }
        Number numericValue = f.numericValue();
        if (numericValue != null) {
            if (numericValue instanceof Double) {
                visitor.doubleField(info, numericValue.doubleValue());
            } else if (numericValue instanceof Integer) {
                visitor.intField(info, numericValue.intValue());
            } else if (numericValue instanceof Float) {
                visitor.floatField(info, numericValue.floatValue());
            } else if (numericValue instanceof Long) {
                visitor.longField(info, numericValue.longValue());
            } else {
                throw new AssertionError();
            }
            continue;
        }
        // must be String
        if (f instanceof LargeLazyField) {
            // optimization to avoid premature string conversion
            visitor.stringField(info, toByteArrayUnwrapIfPossible(((LargeLazyField) f).readBytes()));
        } else {
            visitor.stringField(info, f.stringValue().getBytes(StandardCharsets.UTF_8));
        }
    }
}
Also used : IndexableField(org.apache.lucene.index.IndexableField) StoredFieldVisitor(org.apache.lucene.index.StoredFieldVisitor) DocumentStoredFieldVisitor(org.apache.lucene.document.DocumentStoredFieldVisitor) FieldInfo(org.apache.lucene.index.FieldInfo) BytesRef(org.apache.lucene.util.BytesRef)

Example 18 with FieldInfo

use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.

the class Geo3DPointDistanceComparator method getLeafComparator.

@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
    LeafReader reader = context.reader();
    FieldInfo info = reader.getFieldInfos().fieldInfo(field);
    if (info != null) {
        Geo3DDocValuesField.checkCompatible(info);
    }
    currentDocs = DocValues.getSortedNumeric(reader, field);
    return this;
}
Also used : LeafReader(org.apache.lucene.index.LeafReader) FieldInfo(org.apache.lucene.index.FieldInfo)

Example 19 with FieldInfo

use of org.apache.lucene.index.FieldInfo in project stanbol by apache.

the class IndexConfiguration method processFstConfig.

/**
     * This method combines the {@link #fstConfig} with the data present in the
     * {@link SolrCore}. 
     * <p>
     * As information for fields are only available when a
     * field was actually used by a document stored in the index one needs to
     * inspect the index after every change. 
     * <p>
     * An empty Solr index will result in
     * an empty {@link #corpusInfos} map. The first document with an value
     * for the English field will cause an {@link CorpusInfo} for the English
     * language to be created. As soon as the last document with an label for
     * a given language will be deleted the {@link CorpusInfo} for that language
     * will also disappear.
     * @param indexVersion the current version of the {@link #index} to process
     * the FST config for.
     * <p>
     * This method acquires a write lock on {@link #corpusInfoLock} while it
     * inspects the Solr index
     * @param indexReader The {@link AtomicReader} has access to the actual
     * fields present in the {@link SolrCore}. It is used to compare field
     * configurations in the {@link #fstConfig} with fields present in the Solr
     * {@link #index}.
     * @return If any {@link CorpusInfo FST configuration} where found during
     * inspecting the Solr {@link #index}
     */
private boolean processFstConfig(long indexVersion, AtomicReader indexReader) {
    //first check if the Solr index was updated
    corpusInfoLock.readLock().lock();
    try {
        if (indexVersion == this.indexVersion) {
            //nothing to do
            return !corpusInfos.isEmpty();
        }
    } finally {
        corpusInfoLock.readLock().unlock();
    }
    log.debug("> {} FST config for {} (FST dir: {})", corpusInfos == null ? "create" : "update", index.getName(), fstDirectory.getAbsolutePath());
    boolean foundCorpus = false;
    corpusInfoLock.writeLock().lock();
    try {
        this.indexVersion = indexVersion;
        IndexSchema schema = index.getLatestSchema();
        Map<String, CorpusInfo> corpusInfosCopy;
        if (corpusInfos == null) {
            //first call
            //init the field
            corpusInfos = new HashMap<String, CorpusInfo>();
            corpusInfosCopy = new HashMap<String, CorpusInfo>();
        } else {
            corpusInfosCopy = new HashMap<String, CorpusInfo>(corpusInfos);
            //clear the old data
            corpusInfos.clear();
        }
        //(0) get basic parameters of the default configuration
        log.debug(" - default config");
        Map<String, String> defaultParams = fstConfig.getDefaultParameters();
        String fstName = defaultParams.get(IndexConfiguration.PARAM_FST);
        String indexField = defaultParams.get(IndexConfiguration.PARAM_FIELD);
        String storeField = defaultParams.get(IndexConfiguration.PARAM_STORE_FIELD);
        if (storeField == null) {
            //apply indexField as default if indexField is NOT NULL
            storeField = indexField;
        }
        if (indexField == null) {
            //apply the defaults if null
            indexField = IndexConfiguration.DEFAULT_FIELD;
        }
        if (fstName == null) {
            //use default
            fstName = getDefaultFstFileName(indexField);
        }
        //This are all fields actually present in the index (distinguished with
        //those defined in the schema). This also includes actual instances of
        //dynamic field definition in the schema.
        //we need this twice
        FieldInfos fieldInfos = indexReader.getFieldInfos();
        //NOTE: this needs only do be done if wildcards are enabled in the fstConfig
        if (fstConfig.useWildcard()) {
            //(1.a) search for present FST files in the FST directory
            Map<String, File> presentFstFiles = new HashMap<String, File>();
            WildcardFileFilter fstFilter = new WildcardFileFilter(fstName + ".*.fst");
            Iterator<File> fstFiles = FileUtils.iterateFiles(fstDirectory, fstFilter, null);
            while (fstFiles.hasNext()) {
                File fstFile = fstFiles.next();
                String fstFileName = fstFile.getName();
                //files are named such as "{name}.{lang}.fst"
                String language = FilenameUtils.getExtension(FilenameUtils.getBaseName(fstFileName));
                presentFstFiles.put(language, fstFile);
            }
            //(1.b) iterate over the fields in the Solr index and search for 
            //      matches against the configured indexField name
            String fieldWildcard = FieldEncodingEnum.encodeLanguage(indexField, fieldEncoding, "*");
            for (FieldInfo fieldInfo : fieldInfos) {
                //try to match the field names against the wildcard
                if (FilenameUtils.wildcardMatch(fieldInfo.name, fieldWildcard)) {
                    //for matches parse the language from the field name
                    String language = FieldEncodingEnum.parseLanguage(fieldInfo.name, fieldEncoding, indexField);
                    if (//successfully parsed language
                    language != null && //is current language is enabled? 
                    fstConfig.isLanguage(language) && //is there no explicit configuration for this language?
                    !fstConfig.getExplicitlyIncluded().contains(language)) {
                        //generate the FST file name
                        StringBuilder fstFileName = new StringBuilder(fstName);
                        if (!language.isEmpty()) {
                            fstFileName.append('.').append(language);
                        }
                        fstFileName.append(".fst");
                        File fstFile = new File(fstDirectory, fstFileName.toString());
                        //get the FieldType of the field from the Solr schema
                        FieldType fieldType = schema.getFieldTypeNoEx(fieldInfo.name);
                        if (fieldType != null) {
                            //if the fieldType is present
                            if (runtimeGeneration || fstFile.isFile()) {
                                //and FST is present or can be created
                                //we need also to check if the stored field with
                                //the labels is present
                                //get the stored Field and check if it is present!
                                String storeFieldName;
                                if (storeField == null) {
                                    //storeField == indexField
                                    storeFieldName = fieldInfo.name;
                                } else {
                                    // check that the storeField is present in the index
                                    storeFieldName = FieldEncodingEnum.encodeLanguage(storeField, fieldEncoding, language);
                                    FieldInfo storedFieldInfos = fieldInfos.fieldInfo(storeFieldName);
                                    if (storedFieldInfos == null) {
                                        log.debug(" ... ignore language {} because Stored Field {} " + "for IndexField {} does not exist! ", new Object[] { language, storeFieldName, fieldInfo.name });
                                        storeFieldName = null;
                                    }
                                }
                                if (storeFieldName != null) {
                                    // == valid configuration
                                    CorpusInfo fstInfo = corpusInfosCopy.get(language);
                                    if (//new one
                                    fstInfo == null || //index field compatible
                                    !fstInfo.indexedField.equals(fieldInfo.name) || !fstInfo.storedField.equals(storeFieldName)) {
                                        //store field compatible
                                        CorpusInfo newFstInfo = new CorpusInfo(language, fieldInfo.name, storeFieldName, fieldType, fstFile, runtimeGeneration);
                                        log.debug(" ... {} {} ", fstInfo == null ? "create" : "update", newFstInfo);
                                        addCorpusInfo(newFstInfo);
                                        corpusInfosCopy.put(language, newFstInfo);
                                    } else {
                                        //no change in the SolrIndex ... use the exsisting CorpusInfo
                                        addCorpusInfo(fstInfo);
                                    }
                                    foundCorpus = true;
                                }
                            } else {
                                log.debug(" ... ignore language {} (field: {}) because " + "FST file '{}' does not exist and runtime creation " + "is deactivated!", new Object[] { language, fieldInfo.name, fstFile.getAbsolutePath() });
                            }
                        } else {
                            log.debug(" ... ignore language {} becuase unknown fieldtype " + "for SolrFied {}", language, fieldInfo.name);
                        }
                    }
                //else the field matched the wildcard, but has not passed the
                //encoding test.
                }
            //Solr field does not match the field definition in the config
            }
        // end iterate over all fields in the SolrIndex
        }
        //(2) process explicit configuration for configured languages
        for (String language : fstConfig.getExplicitlyIncluded()) {
            //(2.a) get the language specific config (with fallback to default)
            Map<String, String> config = fstConfig.getParameters(language);
            String langIndexField = config.get(IndexConfiguration.PARAM_FIELD);
            String langStoreField = config.get(IndexConfiguration.PARAM_STORE_FIELD);
            String langFstFileName = config.get(IndexConfiguration.PARAM_FST);
            final boolean langAllowCreation;
            final String langAllowCreationString = config.get(IndexConfiguration.PARAM_RUNTIME_GENERATION);
            if (langIndexField != null) {
                //also consider explicit field names as default for the fst name
                if (langFstFileName == null) {
                    StringBuilder fileName = new StringBuilder(getDefaultFstFileName(langIndexField));
                    if (!language.isEmpty()) {
                        fileName.append('.').append(language);
                    }
                    fileName.append(".fst");
                    langFstFileName = fileName.toString();
                }
            } else {
                langIndexField = indexField;
            }
            if (langStoreField == null) {
                //fallbacks
                if (storeField != null) {
                    //first to default store field
                    langStoreField = storeField;
                } else {
                    //else to the lang index field
                    langStoreField = langIndexField;
                }
            }
            if (langFstFileName == null) {
                //no fstFileName config
                // ... use the default
                langFstFileName = new StringBuilder(fstName).append('.').append(language).append(".fst").toString();
            }
            if (langAllowCreationString != null) {
                langAllowCreation = Boolean.parseBoolean(langAllowCreationString);
            } else {
                langAllowCreation = runtimeGeneration;
            }
            //(2.b) check if the Solr field is present
            String encodedLangIndexField = FieldEncodingEnum.encodeLanguage(langIndexField, fieldEncoding, language);
            String encodedLangStoreField = FieldEncodingEnum.encodeLanguage(langStoreField, fieldEncoding, language);
            FieldInfo langIndexFieldInfo = fieldInfos.fieldInfo(encodedLangIndexField);
            if (langIndexFieldInfo != null) {
                FieldInfo langStoreFieldInfo = fieldInfos.fieldInfo(encodedLangStoreField);
                if (langStoreFieldInfo != null) {
                    FieldType fieldType = schema.getFieldTypeNoEx(langIndexFieldInfo.name);
                    if (fieldType != null) {
                        //(2.c) check the FST file
                        File langFstFile = new File(fstDirectory, langFstFileName);
                        if (langFstFile.isFile() || langAllowCreation) {
                            CorpusInfo langFstInfo = corpusInfosCopy.get(language);
                            if (//new one
                            langFstInfo == null || //index field compatible
                            !langFstInfo.indexedField.equals(encodedLangIndexField) || !langFstInfo.storedField.equals(encodedLangStoreField)) {
                                //store field compatible
                                CorpusInfo newLangFstInfo = new CorpusInfo(language, encodedLangIndexField, encodedLangStoreField, fieldType, langFstFile, langAllowCreation);
                                log.debug("   ... {} {} for explicitly configured language", langFstInfo == null ? "create" : "update", newLangFstInfo);
                                addCorpusInfo(newLangFstInfo);
                            } else {
                                //we can use the existing instance
                                addCorpusInfo(langFstInfo);
                            }
                            foundCorpus = true;
                        } else {
                            log.debug(" ... ignore explicitly configured language {} (field: {}) because " + "FST file '{}' does not exist and runtime creation " + "is deactivated!", new Object[] { language, langIndexFieldInfo.name, langFstFile.getAbsolutePath() });
                        }
                    } else {
                        log.debug(" ... ignore explicitly configured language {} becuase unknown fieldtype " + "for SolrFied {}", language, langIndexFieldInfo.name);
                    }
                } else {
                    log.debug(" ... ignore explicitly configured language {} because configured stored Field {} " + "for IndexField {} does not exist! ", new Object[] { language, langStoreField, langIndexFieldInfo.name });
                }
            } else {
                log.debug(" ... ignore explicitly configured language {} because configured field {} (encoded: {}) " + "is not present in the SolrIndex!", new Object[] { language, langIndexField, encodedLangIndexField });
            }
        }
    } finally {
        corpusInfoLock.writeLock().unlock();
    }
    return foundCorpus;
}
Also used : HashMap(java.util.HashMap) WildcardFileFilter(org.apache.commons.io.filefilter.WildcardFileFilter) FieldType(org.apache.solr.schema.FieldType) FieldInfos(org.apache.lucene.index.FieldInfos) IndexSchema(org.apache.solr.schema.IndexSchema) File(java.io.File) FieldInfo(org.apache.lucene.index.FieldInfo)

Example 20 with FieldInfo

use of org.apache.lucene.index.FieldInfo in project elasticsearch by elastic.

the class MappedFieldType method stats.

/**
     * @return a {@link FieldStats} instance that maps to the type of this
     * field or {@code null} if the provided index has no stats about the
     * current field
     */
public FieldStats stats(IndexReader reader) throws IOException {
    int maxDoc = reader.maxDoc();
    FieldInfo fi = MultiFields.getMergedFieldInfos(reader).fieldInfo(name());
    if (fi == null) {
        return null;
    }
    Terms terms = MultiFields.getTerms(reader, name());
    if (terms == null) {
        return new FieldStats.Text(maxDoc, 0, -1, -1, isSearchable(), isAggregatable());
    }
    FieldStats stats = new FieldStats.Text(maxDoc, terms.getDocCount(), terms.getSumDocFreq(), terms.getSumTotalTermFreq(), isSearchable(), isAggregatable(), terms.getMin(), terms.getMax());
    return stats;
}
Also used : Terms(org.apache.lucene.index.Terms) FieldInfo(org.apache.lucene.index.FieldInfo) FieldStats(org.elasticsearch.action.fieldstats.FieldStats)

Aggregations

FieldInfo (org.apache.lucene.index.FieldInfo)53 BytesRef (org.apache.lucene.util.BytesRef)13 LeafReader (org.apache.lucene.index.LeafReader)12 ArrayList (java.util.ArrayList)10 Terms (org.apache.lucene.index.Terms)9 TermsEnum (org.apache.lucene.index.TermsEnum)9 IOException (java.io.IOException)8 FieldInfos (org.apache.lucene.index.FieldInfos)8 HashMap (java.util.HashMap)7 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)7 DocValuesType (org.apache.lucene.index.DocValuesType)6 PointValues (org.apache.lucene.index.PointValues)6 IndexOutput (org.apache.lucene.store.IndexOutput)6 CorruptIndexException (org.apache.lucene.index.CorruptIndexException)5 SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)5 StoredFieldVisitor (org.apache.lucene.index.StoredFieldVisitor)5 Map (java.util.Map)4 Document (org.apache.lucene.document.Document)4 EmptyDocValuesProducer (org.apache.lucene.index.EmptyDocValuesProducer)4 IndexReader (org.apache.lucene.index.IndexReader)4