use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.
the class SolrDocumentFetcher method decorateDocValueFields.
/**
* This will fetch and add the docValues fields to a given SolrDocument/SolrInputDocument
*
* @param doc
* A SolrDocument or SolrInputDocument instance where docValues will be added
* @param docid
* The lucene docid of the document to be populated
* @param fields
* The list of docValues fields to be decorated
*/
public void decorateDocValueFields(@SuppressWarnings("rawtypes") SolrDocumentBase doc, int docid, Set<String> fields) throws IOException {
final List<LeafReaderContext> leafContexts = searcher.getLeafContexts();
final int subIndex = ReaderUtil.subIndex(docid, leafContexts);
final int localId = docid - leafContexts.get(subIndex).docBase;
final LeafReader leafReader = leafContexts.get(subIndex).reader();
for (String fieldName : fields) {
final SchemaField schemaField = searcher.getSchema().getFieldOrNull(fieldName);
if (schemaField == null || !schemaField.hasDocValues() || doc.containsKey(fieldName)) {
log.warn("Couldn't decorate docValues for field: [{}], schemaField: [{}]", fieldName, schemaField);
continue;
}
FieldInfo fi = searcher.getFieldInfos().fieldInfo(fieldName);
if (fi == null) {
// Searcher doesn't have info about this field, hence ignore it.
continue;
}
final DocValuesType dvType = fi.getDocValuesType();
switch(dvType) {
case NUMERIC:
final NumericDocValues ndv = leafReader.getNumericDocValues(fieldName);
if (ndv == null) {
continue;
}
Long val;
if (ndv.advanceExact(localId)) {
val = ndv.longValue();
} else {
continue;
}
Object newVal = val;
if (schemaField.getType().isPointField()) {
// TODO: Maybe merge PointField with TrieFields here
NumberType type = schemaField.getType().getNumberType();
switch(type) {
case INTEGER:
newVal = val.intValue();
break;
case LONG:
newVal = val.longValue();
break;
case FLOAT:
newVal = Float.intBitsToFloat(val.intValue());
break;
case DOUBLE:
newVal = Double.longBitsToDouble(val);
break;
case DATE:
newVal = new Date(val);
break;
default:
throw new AssertionError("Unexpected PointType: " + type);
}
} else {
if (schemaField.getType() instanceof TrieIntField) {
newVal = val.intValue();
} else if (schemaField.getType() instanceof TrieFloatField) {
newVal = Float.intBitsToFloat(val.intValue());
} else if (schemaField.getType() instanceof TrieDoubleField) {
newVal = Double.longBitsToDouble(val);
} else if (schemaField.getType() instanceof TrieDateField) {
newVal = new Date(val);
} else if (schemaField.getType() instanceof EnumField) {
newVal = ((EnumField) schemaField.getType()).intValueToStringValue(val.intValue());
}
}
doc.addField(fieldName, newVal);
break;
case BINARY:
BinaryDocValues bdv = leafReader.getBinaryDocValues(fieldName);
if (bdv == null) {
continue;
}
BytesRef value;
if (bdv.advanceExact(localId)) {
value = BytesRef.deepCopyOf(bdv.binaryValue());
} else {
continue;
}
doc.addField(fieldName, value);
break;
case SORTED:
SortedDocValues sdv = leafReader.getSortedDocValues(fieldName);
if (sdv == null) {
continue;
}
if (sdv.advanceExact(localId)) {
final BytesRef bRef = sdv.binaryValue();
// Special handling for Boolean fields since they're stored as 'T' and 'F'.
if (schemaField.getType() instanceof BoolField) {
doc.addField(fieldName, schemaField.getType().toObject(schemaField, bRef));
} else {
doc.addField(fieldName, bRef.utf8ToString());
}
}
break;
case SORTED_NUMERIC:
final SortedNumericDocValues numericDv = leafReader.getSortedNumericDocValues(fieldName);
NumberType type = schemaField.getType().getNumberType();
if (numericDv != null) {
if (numericDv.advance(localId) == localId) {
final List<Object> outValues = new ArrayList<Object>(numericDv.docValueCount());
for (int i = 0; i < numericDv.docValueCount(); i++) {
long number = numericDv.nextValue();
switch(type) {
case INTEGER:
outValues.add((int) number);
break;
case LONG:
outValues.add(number);
break;
case FLOAT:
outValues.add(NumericUtils.sortableIntToFloat((int) number));
break;
case DOUBLE:
outValues.add(NumericUtils.sortableLongToDouble(number));
break;
case DATE:
outValues.add(new Date(number));
break;
default:
throw new AssertionError("Unexpected PointType: " + type);
}
}
assert outValues.size() > 0;
doc.addField(fieldName, outValues);
}
}
case SORTED_SET:
final SortedSetDocValues values = leafReader.getSortedSetDocValues(fieldName);
if (values != null && values.getValueCount() > 0) {
if (values.advance(localId) == localId) {
final List<Object> outValues = new LinkedList<>();
for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values.nextOrd()) {
value = values.lookupOrd(ord);
outValues.add(schemaField.getType().toObject(schemaField, value));
}
assert outValues.size() > 0;
doc.addField(fieldName, outValues);
}
}
case NONE:
break;
}
}
}
use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.
the class SolrDocumentFetcher method visitFromCached.
/** Executes a stored field visitor against a hit from the document cache */
private void visitFromCached(Document document, StoredFieldVisitor visitor) throws IOException {
for (IndexableField f : document) {
final FieldInfo info = searcher.getFieldInfos().fieldInfo(f.name());
final StoredFieldVisitor.Status needsField = visitor.needsField(info);
if (needsField == StoredFieldVisitor.Status.STOP)
return;
if (needsField == StoredFieldVisitor.Status.NO)
continue;
BytesRef binaryValue = f.binaryValue();
if (binaryValue != null) {
visitor.binaryField(info, toByteArrayUnwrapIfPossible(binaryValue));
continue;
}
Number numericValue = f.numericValue();
if (numericValue != null) {
if (numericValue instanceof Double) {
visitor.doubleField(info, numericValue.doubleValue());
} else if (numericValue instanceof Integer) {
visitor.intField(info, numericValue.intValue());
} else if (numericValue instanceof Float) {
visitor.floatField(info, numericValue.floatValue());
} else if (numericValue instanceof Long) {
visitor.longField(info, numericValue.longValue());
} else {
throw new AssertionError();
}
continue;
}
// must be String
if (f instanceof LargeLazyField) {
// optimization to avoid premature string conversion
visitor.stringField(info, toByteArrayUnwrapIfPossible(((LargeLazyField) f).readBytes()));
} else {
visitor.stringField(info, f.stringValue().getBytes(StandardCharsets.UTF_8));
}
}
}
use of org.apache.lucene.index.FieldInfo in project lucene-solr by apache.
the class Geo3DPointDistanceComparator method getLeafComparator.
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
LeafReader reader = context.reader();
FieldInfo info = reader.getFieldInfos().fieldInfo(field);
if (info != null) {
Geo3DDocValuesField.checkCompatible(info);
}
currentDocs = DocValues.getSortedNumeric(reader, field);
return this;
}
use of org.apache.lucene.index.FieldInfo in project stanbol by apache.
the class IndexConfiguration method processFstConfig.
/**
* This method combines the {@link #fstConfig} with the data present in the
* {@link SolrCore}.
* <p>
* As information for fields are only available when a
* field was actually used by a document stored in the index one needs to
* inspect the index after every change.
* <p>
* An empty Solr index will result in
* an empty {@link #corpusInfos} map. The first document with an value
* for the English field will cause an {@link CorpusInfo} for the English
* language to be created. As soon as the last document with an label for
* a given language will be deleted the {@link CorpusInfo} for that language
* will also disappear.
* @param indexVersion the current version of the {@link #index} to process
* the FST config for.
* <p>
* This method acquires a write lock on {@link #corpusInfoLock} while it
* inspects the Solr index
* @param indexReader The {@link AtomicReader} has access to the actual
* fields present in the {@link SolrCore}. It is used to compare field
* configurations in the {@link #fstConfig} with fields present in the Solr
* {@link #index}.
* @return If any {@link CorpusInfo FST configuration} where found during
* inspecting the Solr {@link #index}
*/
private boolean processFstConfig(long indexVersion, AtomicReader indexReader) {
//first check if the Solr index was updated
corpusInfoLock.readLock().lock();
try {
if (indexVersion == this.indexVersion) {
//nothing to do
return !corpusInfos.isEmpty();
}
} finally {
corpusInfoLock.readLock().unlock();
}
log.debug("> {} FST config for {} (FST dir: {})", corpusInfos == null ? "create" : "update", index.getName(), fstDirectory.getAbsolutePath());
boolean foundCorpus = false;
corpusInfoLock.writeLock().lock();
try {
this.indexVersion = indexVersion;
IndexSchema schema = index.getLatestSchema();
Map<String, CorpusInfo> corpusInfosCopy;
if (corpusInfos == null) {
//first call
//init the field
corpusInfos = new HashMap<String, CorpusInfo>();
corpusInfosCopy = new HashMap<String, CorpusInfo>();
} else {
corpusInfosCopy = new HashMap<String, CorpusInfo>(corpusInfos);
//clear the old data
corpusInfos.clear();
}
//(0) get basic parameters of the default configuration
log.debug(" - default config");
Map<String, String> defaultParams = fstConfig.getDefaultParameters();
String fstName = defaultParams.get(IndexConfiguration.PARAM_FST);
String indexField = defaultParams.get(IndexConfiguration.PARAM_FIELD);
String storeField = defaultParams.get(IndexConfiguration.PARAM_STORE_FIELD);
if (storeField == null) {
//apply indexField as default if indexField is NOT NULL
storeField = indexField;
}
if (indexField == null) {
//apply the defaults if null
indexField = IndexConfiguration.DEFAULT_FIELD;
}
if (fstName == null) {
//use default
fstName = getDefaultFstFileName(indexField);
}
//This are all fields actually present in the index (distinguished with
//those defined in the schema). This also includes actual instances of
//dynamic field definition in the schema.
//we need this twice
FieldInfos fieldInfos = indexReader.getFieldInfos();
//NOTE: this needs only do be done if wildcards are enabled in the fstConfig
if (fstConfig.useWildcard()) {
//(1.a) search for present FST files in the FST directory
Map<String, File> presentFstFiles = new HashMap<String, File>();
WildcardFileFilter fstFilter = new WildcardFileFilter(fstName + ".*.fst");
Iterator<File> fstFiles = FileUtils.iterateFiles(fstDirectory, fstFilter, null);
while (fstFiles.hasNext()) {
File fstFile = fstFiles.next();
String fstFileName = fstFile.getName();
//files are named such as "{name}.{lang}.fst"
String language = FilenameUtils.getExtension(FilenameUtils.getBaseName(fstFileName));
presentFstFiles.put(language, fstFile);
}
//(1.b) iterate over the fields in the Solr index and search for
// matches against the configured indexField name
String fieldWildcard = FieldEncodingEnum.encodeLanguage(indexField, fieldEncoding, "*");
for (FieldInfo fieldInfo : fieldInfos) {
//try to match the field names against the wildcard
if (FilenameUtils.wildcardMatch(fieldInfo.name, fieldWildcard)) {
//for matches parse the language from the field name
String language = FieldEncodingEnum.parseLanguage(fieldInfo.name, fieldEncoding, indexField);
if (//successfully parsed language
language != null && //is current language is enabled?
fstConfig.isLanguage(language) && //is there no explicit configuration for this language?
!fstConfig.getExplicitlyIncluded().contains(language)) {
//generate the FST file name
StringBuilder fstFileName = new StringBuilder(fstName);
if (!language.isEmpty()) {
fstFileName.append('.').append(language);
}
fstFileName.append(".fst");
File fstFile = new File(fstDirectory, fstFileName.toString());
//get the FieldType of the field from the Solr schema
FieldType fieldType = schema.getFieldTypeNoEx(fieldInfo.name);
if (fieldType != null) {
//if the fieldType is present
if (runtimeGeneration || fstFile.isFile()) {
//and FST is present or can be created
//we need also to check if the stored field with
//the labels is present
//get the stored Field and check if it is present!
String storeFieldName;
if (storeField == null) {
//storeField == indexField
storeFieldName = fieldInfo.name;
} else {
// check that the storeField is present in the index
storeFieldName = FieldEncodingEnum.encodeLanguage(storeField, fieldEncoding, language);
FieldInfo storedFieldInfos = fieldInfos.fieldInfo(storeFieldName);
if (storedFieldInfos == null) {
log.debug(" ... ignore language {} because Stored Field {} " + "for IndexField {} does not exist! ", new Object[] { language, storeFieldName, fieldInfo.name });
storeFieldName = null;
}
}
if (storeFieldName != null) {
// == valid configuration
CorpusInfo fstInfo = corpusInfosCopy.get(language);
if (//new one
fstInfo == null || //index field compatible
!fstInfo.indexedField.equals(fieldInfo.name) || !fstInfo.storedField.equals(storeFieldName)) {
//store field compatible
CorpusInfo newFstInfo = new CorpusInfo(language, fieldInfo.name, storeFieldName, fieldType, fstFile, runtimeGeneration);
log.debug(" ... {} {} ", fstInfo == null ? "create" : "update", newFstInfo);
addCorpusInfo(newFstInfo);
corpusInfosCopy.put(language, newFstInfo);
} else {
//no change in the SolrIndex ... use the exsisting CorpusInfo
addCorpusInfo(fstInfo);
}
foundCorpus = true;
}
} else {
log.debug(" ... ignore language {} (field: {}) because " + "FST file '{}' does not exist and runtime creation " + "is deactivated!", new Object[] { language, fieldInfo.name, fstFile.getAbsolutePath() });
}
} else {
log.debug(" ... ignore language {} becuase unknown fieldtype " + "for SolrFied {}", language, fieldInfo.name);
}
}
//else the field matched the wildcard, but has not passed the
//encoding test.
}
//Solr field does not match the field definition in the config
}
// end iterate over all fields in the SolrIndex
}
//(2) process explicit configuration for configured languages
for (String language : fstConfig.getExplicitlyIncluded()) {
//(2.a) get the language specific config (with fallback to default)
Map<String, String> config = fstConfig.getParameters(language);
String langIndexField = config.get(IndexConfiguration.PARAM_FIELD);
String langStoreField = config.get(IndexConfiguration.PARAM_STORE_FIELD);
String langFstFileName = config.get(IndexConfiguration.PARAM_FST);
final boolean langAllowCreation;
final String langAllowCreationString = config.get(IndexConfiguration.PARAM_RUNTIME_GENERATION);
if (langIndexField != null) {
//also consider explicit field names as default for the fst name
if (langFstFileName == null) {
StringBuilder fileName = new StringBuilder(getDefaultFstFileName(langIndexField));
if (!language.isEmpty()) {
fileName.append('.').append(language);
}
fileName.append(".fst");
langFstFileName = fileName.toString();
}
} else {
langIndexField = indexField;
}
if (langStoreField == null) {
//fallbacks
if (storeField != null) {
//first to default store field
langStoreField = storeField;
} else {
//else to the lang index field
langStoreField = langIndexField;
}
}
if (langFstFileName == null) {
//no fstFileName config
// ... use the default
langFstFileName = new StringBuilder(fstName).append('.').append(language).append(".fst").toString();
}
if (langAllowCreationString != null) {
langAllowCreation = Boolean.parseBoolean(langAllowCreationString);
} else {
langAllowCreation = runtimeGeneration;
}
//(2.b) check if the Solr field is present
String encodedLangIndexField = FieldEncodingEnum.encodeLanguage(langIndexField, fieldEncoding, language);
String encodedLangStoreField = FieldEncodingEnum.encodeLanguage(langStoreField, fieldEncoding, language);
FieldInfo langIndexFieldInfo = fieldInfos.fieldInfo(encodedLangIndexField);
if (langIndexFieldInfo != null) {
FieldInfo langStoreFieldInfo = fieldInfos.fieldInfo(encodedLangStoreField);
if (langStoreFieldInfo != null) {
FieldType fieldType = schema.getFieldTypeNoEx(langIndexFieldInfo.name);
if (fieldType != null) {
//(2.c) check the FST file
File langFstFile = new File(fstDirectory, langFstFileName);
if (langFstFile.isFile() || langAllowCreation) {
CorpusInfo langFstInfo = corpusInfosCopy.get(language);
if (//new one
langFstInfo == null || //index field compatible
!langFstInfo.indexedField.equals(encodedLangIndexField) || !langFstInfo.storedField.equals(encodedLangStoreField)) {
//store field compatible
CorpusInfo newLangFstInfo = new CorpusInfo(language, encodedLangIndexField, encodedLangStoreField, fieldType, langFstFile, langAllowCreation);
log.debug(" ... {} {} for explicitly configured language", langFstInfo == null ? "create" : "update", newLangFstInfo);
addCorpusInfo(newLangFstInfo);
} else {
//we can use the existing instance
addCorpusInfo(langFstInfo);
}
foundCorpus = true;
} else {
log.debug(" ... ignore explicitly configured language {} (field: {}) because " + "FST file '{}' does not exist and runtime creation " + "is deactivated!", new Object[] { language, langIndexFieldInfo.name, langFstFile.getAbsolutePath() });
}
} else {
log.debug(" ... ignore explicitly configured language {} becuase unknown fieldtype " + "for SolrFied {}", language, langIndexFieldInfo.name);
}
} else {
log.debug(" ... ignore explicitly configured language {} because configured stored Field {} " + "for IndexField {} does not exist! ", new Object[] { language, langStoreField, langIndexFieldInfo.name });
}
} else {
log.debug(" ... ignore explicitly configured language {} because configured field {} (encoded: {}) " + "is not present in the SolrIndex!", new Object[] { language, langIndexField, encodedLangIndexField });
}
}
} finally {
corpusInfoLock.writeLock().unlock();
}
return foundCorpus;
}
use of org.apache.lucene.index.FieldInfo in project elasticsearch by elastic.
the class MappedFieldType method stats.
/**
* @return a {@link FieldStats} instance that maps to the type of this
* field or {@code null} if the provided index has no stats about the
* current field
*/
public FieldStats stats(IndexReader reader) throws IOException {
int maxDoc = reader.maxDoc();
FieldInfo fi = MultiFields.getMergedFieldInfos(reader).fieldInfo(name());
if (fi == null) {
return null;
}
Terms terms = MultiFields.getTerms(reader, name());
if (terms == null) {
return new FieldStats.Text(maxDoc, 0, -1, -1, isSearchable(), isAggregatable());
}
FieldStats stats = new FieldStats.Text(maxDoc, terms.getDocCount(), terms.getSumDocFreq(), terms.getSumTotalTermFreq(), isSearchable(), isAggregatable(), terms.getMin(), terms.getMax());
return stats;
}
Aggregations