use of org.apache.solr.schema.FieldType in project lucene-solr by apache.
the class AnalyzingInfixLookupFactory method create.
@Override
public Lookup create(NamedList params, SolrCore core) {
// mandatory parameter
Object fieldTypeName = params.get(QUERY_ANALYZER);
if (fieldTypeName == null) {
throw new IllegalArgumentException("Error in configuration: " + QUERY_ANALYZER + " parameter is mandatory");
}
FieldType ft = core.getLatestSchema().getFieldTypeByName(fieldTypeName.toString());
if (ft == null) {
throw new IllegalArgumentException("Error in configuration: " + fieldTypeName.toString() + " is not defined in the schema");
}
Analyzer indexAnalyzer = ft.getIndexAnalyzer();
Analyzer queryAnalyzer = ft.getQueryAnalyzer();
// optional parameters
String indexPath = params.get(INDEX_PATH) != null ? params.get(INDEX_PATH).toString() : DEFAULT_INDEX_PATH;
if (new File(indexPath).isAbsolute() == false) {
indexPath = core.getDataDir() + File.separator + indexPath;
}
int minPrefixChars = params.get(MIN_PREFIX_CHARS) != null ? Integer.parseInt(params.get(MIN_PREFIX_CHARS).toString()) : AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS;
boolean allTermsRequired = params.get(ALL_TERMS_REQUIRED) != null ? Boolean.getBoolean(params.get(ALL_TERMS_REQUIRED).toString()) : AnalyzingInfixSuggester.DEFAULT_ALL_TERMS_REQUIRED;
boolean highlight = params.get(HIGHLIGHT) != null ? Boolean.getBoolean(params.get(HIGHLIGHT).toString()) : AnalyzingInfixSuggester.DEFAULT_HIGHLIGHT;
try {
return new AnalyzingInfixSuggester(FSDirectory.open(new File(indexPath).toPath()), indexAnalyzer, queryAnalyzer, minPrefixChars, true, allTermsRequired, highlight) {
@Override
public List<LookupResult> lookup(CharSequence key, Set<BytesRef> contexts, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
List<LookupResult> res = super.lookup(key, contexts, num, allTermsRequired, doHighlight);
if (doHighlight) {
List<LookupResult> res2 = new ArrayList<>();
for (LookupResult hit : res) {
res2.add(new LookupResult(hit.highlightKey.toString(), hit.highlightKey, hit.value, hit.payload, hit.contexts));
}
res = res2;
}
return res;
}
};
} catch (IOException e) {
throw new RuntimeException(e);
}
}
use of org.apache.solr.schema.FieldType in project lucene-solr by apache.
the class SolrSpellChecker method init.
public String init(NamedList config, SolrCore core) {
name = (String) config.get(DICTIONARY_NAME);
if (name == null) {
name = DEFAULT_DICTIONARY_NAME;
}
field = (String) config.get(FIELD);
IndexSchema schema = core.getLatestSchema();
if (field != null && schema.getFieldTypeNoEx(field) != null) {
analyzer = schema.getFieldType(field).getQueryAnalyzer();
}
fieldTypeName = (String) config.get(FIELD_TYPE);
if (schema.getFieldTypes().containsKey(fieldTypeName)) {
FieldType fieldType = schema.getFieldTypes().get(fieldTypeName);
analyzer = fieldType.getQueryAnalyzer();
}
if (analyzer == null) {
analyzer = new WhitespaceAnalyzer();
}
return name;
}
use of org.apache.solr.schema.FieldType in project lucene-solr by apache.
the class FreeTextLookupFactory method create.
@Override
public Lookup create(NamedList params, SolrCore core) {
Object fieldTypeName = params.get(QUERY_ANALYZER);
if (fieldTypeName == null) {
throw new IllegalArgumentException("Error in configuration: " + QUERY_ANALYZER + " parameter is mandatory");
}
FieldType ft = core.getLatestSchema().getFieldTypeByName(fieldTypeName.toString());
if (ft == null) {
throw new IllegalArgumentException("Error in configuration: " + fieldTypeName.toString() + " is not defined in the schema");
}
Analyzer indexAnalyzer = ft.getIndexAnalyzer();
Analyzer queryAnalyzer = ft.getQueryAnalyzer();
int grams = (params.get(NGRAMS) != null) ? Integer.parseInt(params.get(NGRAMS).toString()) : FreeTextSuggester.DEFAULT_GRAMS;
byte separator = (params.get(SEPARATOR) != null) ? params.get(SEPARATOR).toString().getBytes(StandardCharsets.UTF_8)[0] : FreeTextSuggester.DEFAULT_SEPARATOR;
return new FreeTextSuggester(indexAnalyzer, queryAnalyzer, grams, separator);
}
use of org.apache.solr.schema.FieldType in project stanbol by apache.
the class IndexConfiguration method processFstConfig.
/**
* This method combines the {@link #fstConfig} with the data present in the
* {@link SolrCore}.
* <p>
* As information for fields are only available when a
* field was actually used by a document stored in the index one needs to
* inspect the index after every change.
* <p>
* An empty Solr index will result in
* an empty {@link #corpusInfos} map. The first document with an value
* for the English field will cause an {@link CorpusInfo} for the English
* language to be created. As soon as the last document with an label for
* a given language will be deleted the {@link CorpusInfo} for that language
* will also disappear.
* @param indexVersion the current version of the {@link #index} to process
* the FST config for.
* <p>
* This method acquires a write lock on {@link #corpusInfoLock} while it
* inspects the Solr index
* @param indexReader The {@link AtomicReader} has access to the actual
* fields present in the {@link SolrCore}. It is used to compare field
* configurations in the {@link #fstConfig} with fields present in the Solr
* {@link #index}.
* @return If any {@link CorpusInfo FST configuration} where found during
* inspecting the Solr {@link #index}
*/
private boolean processFstConfig(long indexVersion, AtomicReader indexReader) {
//first check if the Solr index was updated
corpusInfoLock.readLock().lock();
try {
if (indexVersion == this.indexVersion) {
//nothing to do
return !corpusInfos.isEmpty();
}
} finally {
corpusInfoLock.readLock().unlock();
}
log.debug("> {} FST config for {} (FST dir: {})", corpusInfos == null ? "create" : "update", index.getName(), fstDirectory.getAbsolutePath());
boolean foundCorpus = false;
corpusInfoLock.writeLock().lock();
try {
this.indexVersion = indexVersion;
IndexSchema schema = index.getLatestSchema();
Map<String, CorpusInfo> corpusInfosCopy;
if (corpusInfos == null) {
//first call
//init the field
corpusInfos = new HashMap<String, CorpusInfo>();
corpusInfosCopy = new HashMap<String, CorpusInfo>();
} else {
corpusInfosCopy = new HashMap<String, CorpusInfo>(corpusInfos);
//clear the old data
corpusInfos.clear();
}
//(0) get basic parameters of the default configuration
log.debug(" - default config");
Map<String, String> defaultParams = fstConfig.getDefaultParameters();
String fstName = defaultParams.get(IndexConfiguration.PARAM_FST);
String indexField = defaultParams.get(IndexConfiguration.PARAM_FIELD);
String storeField = defaultParams.get(IndexConfiguration.PARAM_STORE_FIELD);
if (storeField == null) {
//apply indexField as default if indexField is NOT NULL
storeField = indexField;
}
if (indexField == null) {
//apply the defaults if null
indexField = IndexConfiguration.DEFAULT_FIELD;
}
if (fstName == null) {
//use default
fstName = getDefaultFstFileName(indexField);
}
//This are all fields actually present in the index (distinguished with
//those defined in the schema). This also includes actual instances of
//dynamic field definition in the schema.
//we need this twice
FieldInfos fieldInfos = indexReader.getFieldInfos();
//NOTE: this needs only do be done if wildcards are enabled in the fstConfig
if (fstConfig.useWildcard()) {
//(1.a) search for present FST files in the FST directory
Map<String, File> presentFstFiles = new HashMap<String, File>();
WildcardFileFilter fstFilter = new WildcardFileFilter(fstName + ".*.fst");
Iterator<File> fstFiles = FileUtils.iterateFiles(fstDirectory, fstFilter, null);
while (fstFiles.hasNext()) {
File fstFile = fstFiles.next();
String fstFileName = fstFile.getName();
//files are named such as "{name}.{lang}.fst"
String language = FilenameUtils.getExtension(FilenameUtils.getBaseName(fstFileName));
presentFstFiles.put(language, fstFile);
}
//(1.b) iterate over the fields in the Solr index and search for
// matches against the configured indexField name
String fieldWildcard = FieldEncodingEnum.encodeLanguage(indexField, fieldEncoding, "*");
for (FieldInfo fieldInfo : fieldInfos) {
//try to match the field names against the wildcard
if (FilenameUtils.wildcardMatch(fieldInfo.name, fieldWildcard)) {
//for matches parse the language from the field name
String language = FieldEncodingEnum.parseLanguage(fieldInfo.name, fieldEncoding, indexField);
if (//successfully parsed language
language != null && //is current language is enabled?
fstConfig.isLanguage(language) && //is there no explicit configuration for this language?
!fstConfig.getExplicitlyIncluded().contains(language)) {
//generate the FST file name
StringBuilder fstFileName = new StringBuilder(fstName);
if (!language.isEmpty()) {
fstFileName.append('.').append(language);
}
fstFileName.append(".fst");
File fstFile = new File(fstDirectory, fstFileName.toString());
//get the FieldType of the field from the Solr schema
FieldType fieldType = schema.getFieldTypeNoEx(fieldInfo.name);
if (fieldType != null) {
//if the fieldType is present
if (runtimeGeneration || fstFile.isFile()) {
//and FST is present or can be created
//we need also to check if the stored field with
//the labels is present
//get the stored Field and check if it is present!
String storeFieldName;
if (storeField == null) {
//storeField == indexField
storeFieldName = fieldInfo.name;
} else {
// check that the storeField is present in the index
storeFieldName = FieldEncodingEnum.encodeLanguage(storeField, fieldEncoding, language);
FieldInfo storedFieldInfos = fieldInfos.fieldInfo(storeFieldName);
if (storedFieldInfos == null) {
log.debug(" ... ignore language {} because Stored Field {} " + "for IndexField {} does not exist! ", new Object[] { language, storeFieldName, fieldInfo.name });
storeFieldName = null;
}
}
if (storeFieldName != null) {
// == valid configuration
CorpusInfo fstInfo = corpusInfosCopy.get(language);
if (//new one
fstInfo == null || //index field compatible
!fstInfo.indexedField.equals(fieldInfo.name) || !fstInfo.storedField.equals(storeFieldName)) {
//store field compatible
CorpusInfo newFstInfo = new CorpusInfo(language, fieldInfo.name, storeFieldName, fieldType, fstFile, runtimeGeneration);
log.debug(" ... {} {} ", fstInfo == null ? "create" : "update", newFstInfo);
addCorpusInfo(newFstInfo);
corpusInfosCopy.put(language, newFstInfo);
} else {
//no change in the SolrIndex ... use the exsisting CorpusInfo
addCorpusInfo(fstInfo);
}
foundCorpus = true;
}
} else {
log.debug(" ... ignore language {} (field: {}) because " + "FST file '{}' does not exist and runtime creation " + "is deactivated!", new Object[] { language, fieldInfo.name, fstFile.getAbsolutePath() });
}
} else {
log.debug(" ... ignore language {} becuase unknown fieldtype " + "for SolrFied {}", language, fieldInfo.name);
}
}
//else the field matched the wildcard, but has not passed the
//encoding test.
}
//Solr field does not match the field definition in the config
}
// end iterate over all fields in the SolrIndex
}
//(2) process explicit configuration for configured languages
for (String language : fstConfig.getExplicitlyIncluded()) {
//(2.a) get the language specific config (with fallback to default)
Map<String, String> config = fstConfig.getParameters(language);
String langIndexField = config.get(IndexConfiguration.PARAM_FIELD);
String langStoreField = config.get(IndexConfiguration.PARAM_STORE_FIELD);
String langFstFileName = config.get(IndexConfiguration.PARAM_FST);
final boolean langAllowCreation;
final String langAllowCreationString = config.get(IndexConfiguration.PARAM_RUNTIME_GENERATION);
if (langIndexField != null) {
//also consider explicit field names as default for the fst name
if (langFstFileName == null) {
StringBuilder fileName = new StringBuilder(getDefaultFstFileName(langIndexField));
if (!language.isEmpty()) {
fileName.append('.').append(language);
}
fileName.append(".fst");
langFstFileName = fileName.toString();
}
} else {
langIndexField = indexField;
}
if (langStoreField == null) {
//fallbacks
if (storeField != null) {
//first to default store field
langStoreField = storeField;
} else {
//else to the lang index field
langStoreField = langIndexField;
}
}
if (langFstFileName == null) {
//no fstFileName config
// ... use the default
langFstFileName = new StringBuilder(fstName).append('.').append(language).append(".fst").toString();
}
if (langAllowCreationString != null) {
langAllowCreation = Boolean.parseBoolean(langAllowCreationString);
} else {
langAllowCreation = runtimeGeneration;
}
//(2.b) check if the Solr field is present
String encodedLangIndexField = FieldEncodingEnum.encodeLanguage(langIndexField, fieldEncoding, language);
String encodedLangStoreField = FieldEncodingEnum.encodeLanguage(langStoreField, fieldEncoding, language);
FieldInfo langIndexFieldInfo = fieldInfos.fieldInfo(encodedLangIndexField);
if (langIndexFieldInfo != null) {
FieldInfo langStoreFieldInfo = fieldInfos.fieldInfo(encodedLangStoreField);
if (langStoreFieldInfo != null) {
FieldType fieldType = schema.getFieldTypeNoEx(langIndexFieldInfo.name);
if (fieldType != null) {
//(2.c) check the FST file
File langFstFile = new File(fstDirectory, langFstFileName);
if (langFstFile.isFile() || langAllowCreation) {
CorpusInfo langFstInfo = corpusInfosCopy.get(language);
if (//new one
langFstInfo == null || //index field compatible
!langFstInfo.indexedField.equals(encodedLangIndexField) || !langFstInfo.storedField.equals(encodedLangStoreField)) {
//store field compatible
CorpusInfo newLangFstInfo = new CorpusInfo(language, encodedLangIndexField, encodedLangStoreField, fieldType, langFstFile, langAllowCreation);
log.debug(" ... {} {} for explicitly configured language", langFstInfo == null ? "create" : "update", newLangFstInfo);
addCorpusInfo(newLangFstInfo);
} else {
//we can use the existing instance
addCorpusInfo(langFstInfo);
}
foundCorpus = true;
} else {
log.debug(" ... ignore explicitly configured language {} (field: {}) because " + "FST file '{}' does not exist and runtime creation " + "is deactivated!", new Object[] { language, langIndexFieldInfo.name, langFstFile.getAbsolutePath() });
}
} else {
log.debug(" ... ignore explicitly configured language {} becuase unknown fieldtype " + "for SolrFied {}", language, langIndexFieldInfo.name);
}
} else {
log.debug(" ... ignore explicitly configured language {} because configured stored Field {} " + "for IndexField {} does not exist! ", new Object[] { language, langStoreField, langIndexFieldInfo.name });
}
} else {
log.debug(" ... ignore explicitly configured language {} because configured field {} (encoded: {}) " + "is not present in the SolrIndex!", new Object[] { language, langIndexField, encodedLangIndexField });
}
}
} finally {
corpusInfoLock.writeLock().unlock();
}
return foundCorpus;
}
use of org.apache.solr.schema.FieldType in project lucene-solr by apache.
the class SpatialHeatmapFacets method getHeatmapForField.
/** Called by {@link org.apache.solr.request.SimpleFacets} to compute heatmap facets. */
public static NamedList<Object> getHeatmapForField(String fieldKey, String fieldName, ResponseBuilder rb, SolrParams params, DocSet docSet) throws IOException {
//get the strategy from the field type
final SchemaField schemaField = rb.req.getSchema().getField(fieldName);
final FieldType type = schemaField.getType();
final PrefixTreeStrategy strategy;
final DistanceUnits distanceUnits;
// note: the two instanceof conditions is not ideal, versus one. If we start needing to add more then refactor.
if ((type instanceof AbstractSpatialPrefixTreeFieldType)) {
AbstractSpatialPrefixTreeFieldType rptType = (AbstractSpatialPrefixTreeFieldType) type;
strategy = (PrefixTreeStrategy) rptType.getStrategy(fieldName);
distanceUnits = rptType.getDistanceUnits();
} else if (type instanceof RptWithGeometrySpatialField) {
RptWithGeometrySpatialField rptSdvType = (RptWithGeometrySpatialField) type;
strategy = rptSdvType.getStrategy(fieldName).getIndexStrategy();
distanceUnits = rptSdvType.getDistanceUnits();
} else {
//FYI we support the term query one too but few people use that one
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "heatmap field needs to be of type " + SpatialRecursivePrefixTreeFieldType.class + " or " + RptWithGeometrySpatialField.class);
}
final SpatialContext ctx = strategy.getSpatialContext();
//get the bbox (query Rectangle)
String geomStr = params.getFieldParam(fieldKey, FacetParams.FACET_HEATMAP_GEOM);
final Shape boundsShape = geomStr == null ? ctx.getWorldBounds() : SpatialUtils.parseGeomSolrException(geomStr, ctx);
//get the grid level (possibly indirectly via distErr or distErrPct)
final int gridLevel;
Integer gridLevelObj = params.getFieldInt(fieldKey, FacetParams.FACET_HEATMAP_LEVEL);
final int maxGridLevel = strategy.getGrid().getMaxLevels();
if (gridLevelObj != null) {
gridLevel = gridLevelObj;
if (gridLevel <= 0 || gridLevel > maxGridLevel) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, FacetParams.FACET_HEATMAP_LEVEL + " should be > 0 and <= " + maxGridLevel);
}
} else {
//SpatialArgs has utility methods to resolve a 'distErr' from optionally set distErr & distErrPct. Arguably that
// should be refactored to feel less weird than using it like this.
SpatialArgs spatialArgs = new SpatialArgs(SpatialOperation.Intersects, /*ignored*/
boundsShape == null ? ctx.getWorldBounds() : boundsShape);
final Double distErrObj = params.getFieldDouble(fieldKey, FacetParams.FACET_HEATMAP_DIST_ERR);
if (distErrObj != null) {
// convert distErr units based on configured units
spatialArgs.setDistErr(distErrObj * distanceUnits.multiplierFromThisUnitToDegrees());
}
spatialArgs.setDistErrPct(params.getFieldDouble(fieldKey, FacetParams.FACET_HEATMAP_DIST_ERR_PCT));
double distErr = spatialArgs.resolveDistErr(ctx, DEFAULT_DIST_ERR_PCT);
if (distErr <= 0) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, FacetParams.FACET_HEATMAP_DIST_ERR_PCT + " or " + FacetParams.FACET_HEATMAP_DIST_ERR + " should be > 0 or instead provide " + FacetParams.FACET_HEATMAP_LEVEL + "=" + maxGridLevel + " if you insist on maximum detail");
}
//The SPT (grid) can lookup a grid level satisfying an error distance constraint
gridLevel = strategy.getGrid().getLevelForDistance(distErr);
}
//Compute!
final HeatmapFacetCounter.Heatmap heatmap;
try {
heatmap = HeatmapFacetCounter.calcFacets(strategy, rb.req.getSearcher().getTopReaderContext(), // turn DocSet into Bits
getTopAcceptDocs(docSet, rb.req.getSearcher()), boundsShape, gridLevel, // will throw if exceeded
params.getFieldInt(fieldKey, FacetParams.FACET_HEATMAP_MAX_CELLS, 100_000));
} catch (IllegalArgumentException e) {
//e.g. too many cells
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e.toString(), e);
}
//Populate response
NamedList<Object> result = new NamedList<>();
result.add("gridLevel", gridLevel);
result.add("columns", heatmap.columns);
result.add("rows", heatmap.rows);
result.add("minX", heatmap.region.getMinX());
result.add("maxX", heatmap.region.getMaxX());
result.add("minY", heatmap.region.getMinY());
result.add("maxY", heatmap.region.getMaxY());
boolean hasNonZero = false;
for (int count : heatmap.counts) {
if (count > 0) {
hasNonZero = true;
break;
}
}
formatCountsAndAddToNL(fieldKey, rb, params, heatmap.columns, heatmap.rows, hasNonZero ? heatmap.counts : null, result);
return result;
}
Aggregations