use of org.apache.solr.schema.SchemaField in project lucene-solr by apache.
the class GroupedEndResultTransformer method transform.
/**
* {@inheritDoc}
*/
@Override
public void transform(Map<String, ?> result, ResponseBuilder rb, SolrDocumentSource solrDocumentSource) {
NamedList<Object> commands = new SimpleOrderedMap<>();
for (Map.Entry<String, ?> entry : result.entrySet()) {
Object value = entry.getValue();
if (TopGroups.class.isInstance(value)) {
@SuppressWarnings("unchecked") TopGroups<BytesRef> topGroups = (TopGroups<BytesRef>) value;
NamedList<Object> command = new SimpleOrderedMap<>();
command.add("matches", rb.totalHitCount);
Integer totalGroupCount = rb.mergedGroupCounts.get(entry.getKey());
if (totalGroupCount != null) {
command.add("ngroups", totalGroupCount);
}
List<NamedList> groups = new ArrayList<>();
SchemaField groupField = searcher.getSchema().getField(entry.getKey());
FieldType groupFieldType = groupField.getType();
for (GroupDocs<BytesRef> group : topGroups.groups) {
SimpleOrderedMap<Object> groupResult = new SimpleOrderedMap<>();
if (group.groupValue != null) {
groupResult.add("groupValue", groupFieldType.toObject(groupField.createField(group.groupValue.utf8ToString())));
} else {
groupResult.add("groupValue", null);
}
SolrDocumentList docList = new SolrDocumentList();
docList.setNumFound(group.totalHits);
if (!Float.isNaN(group.maxScore)) {
docList.setMaxScore(group.maxScore);
}
docList.setStart(rb.getGroupingSpec().getWithinGroupOffset());
for (ScoreDoc scoreDoc : group.scoreDocs) {
docList.add(solrDocumentSource.retrieve(scoreDoc));
}
groupResult.add("doclist", docList);
groups.add(groupResult);
}
command.add("groups", groups);
commands.add(entry.getKey(), command);
} else if (QueryCommandResult.class.isInstance(value)) {
QueryCommandResult queryCommandResult = (QueryCommandResult) value;
NamedList<Object> command = new SimpleOrderedMap<>();
command.add("matches", queryCommandResult.getMatches());
SolrDocumentList docList = new SolrDocumentList();
docList.setNumFound(queryCommandResult.getTopDocs().totalHits);
if (!Float.isNaN(queryCommandResult.getTopDocs().getMaxScore())) {
docList.setMaxScore(queryCommandResult.getTopDocs().getMaxScore());
}
docList.setStart(rb.getGroupingSpec().getWithinGroupOffset());
for (ScoreDoc scoreDoc : queryCommandResult.getTopDocs().scoreDocs) {
docList.add(solrDocumentSource.retrieve(scoreDoc));
}
command.add("doclist", docList);
commands.add(entry.getKey(), command);
}
}
rb.rsp.add("grouped", commands);
}
use of org.apache.solr.schema.SchemaField in project lucene-solr by apache.
the class CloudMLTQParser method parse.
public Query parse() {
String id = localParams.get(QueryParsing.V);
// Do a Real Time Get for the document
SolrDocument doc = getDocument(id);
if (doc == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error completing MLT request. Could not fetch " + "document with id [" + id + "]");
}
String[] qf = localParams.getParams("qf");
Map<String, Float> boostFields = new HashMap<>();
MoreLikeThis mlt = new MoreLikeThis(req.getSearcher().getIndexReader());
mlt.setMinTermFreq(localParams.getInt("mintf", MoreLikeThis.DEFAULT_MIN_TERM_FREQ));
mlt.setMinDocFreq(localParams.getInt("mindf", 0));
mlt.setMinWordLen(localParams.getInt("minwl", MoreLikeThis.DEFAULT_MIN_WORD_LENGTH));
mlt.setMaxWordLen(localParams.getInt("maxwl", MoreLikeThis.DEFAULT_MAX_WORD_LENGTH));
mlt.setMaxQueryTerms(localParams.getInt("maxqt", MoreLikeThis.DEFAULT_MAX_QUERY_TERMS));
mlt.setMaxNumTokensParsed(localParams.getInt("maxntp", MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED));
mlt.setMaxDocFreq(localParams.getInt("maxdf", MoreLikeThis.DEFAULT_MAX_DOC_FREQ));
Boolean boost = localParams.getBool("boost", MoreLikeThis.DEFAULT_BOOST);
mlt.setBoost(boost);
mlt.setAnalyzer(req.getSchema().getIndexAnalyzer());
Map<String, Collection<Object>> filteredDocument = new HashMap<>();
String[] fieldNames;
if (qf != null) {
ArrayList<String> fields = new ArrayList();
for (String fieldName : qf) {
if (!StringUtils.isEmpty(fieldName)) {
String[] strings = splitList.split(fieldName);
for (String string : strings) {
if (!StringUtils.isEmpty(string)) {
fields.add(string);
}
}
}
}
// Parse field names and boosts from the fields
boostFields = SolrPluginUtils.parseFieldBoosts(fields.toArray(new String[0]));
fieldNames = boostFields.keySet().toArray(new String[0]);
} else {
ArrayList<String> fields = new ArrayList();
for (String field : doc.getFieldNames()) {
// Only use fields that are stored and have an explicit analyzer.
// This makes sense as the query uses tf/idf/.. for query construction.
// We might want to relook and change this in the future though.
SchemaField f = req.getSchema().getFieldOrNull(field);
if (f != null && f.stored() && f.getType().isExplicitAnalyzer()) {
fields.add(field);
}
}
fieldNames = fields.toArray(new String[0]);
}
if (fieldNames.length < 1) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "MoreLikeThis requires at least one similarity field: qf");
}
mlt.setFieldNames(fieldNames);
for (String field : fieldNames) {
Collection<Object> fieldValues = doc.getFieldValues(field);
if (fieldValues != null) {
Collection<Object> values = new ArrayList();
for (Object val : fieldValues) {
if (val instanceof IndexableField) {
values.add(((IndexableField) val).stringValue());
} else {
values.add(val);
}
}
filteredDocument.put(field, values);
}
}
try {
Query rawMLTQuery = mlt.like(filteredDocument);
BooleanQuery boostedMLTQuery = (BooleanQuery) rawMLTQuery;
if (boost && boostFields.size() > 0) {
BooleanQuery.Builder newQ = new BooleanQuery.Builder();
newQ.setMinimumNumberShouldMatch(boostedMLTQuery.getMinimumNumberShouldMatch());
for (BooleanClause clause : boostedMLTQuery) {
Query q = clause.getQuery();
float originalBoost = 1f;
if (q instanceof BoostQuery) {
BoostQuery bq = (BoostQuery) q;
q = bq.getQuery();
originalBoost = bq.getBoost();
}
Float fieldBoost = boostFields.get(((TermQuery) q).getTerm().field());
q = ((fieldBoost != null) ? new BoostQuery(q, fieldBoost * originalBoost) : clause.getQuery());
newQ.add(q, clause.getOccur());
}
boostedMLTQuery = newQ.build();
}
// exclude current document from results
BooleanQuery.Builder realMLTQuery = new BooleanQuery.Builder();
realMLTQuery.add(boostedMLTQuery, BooleanClause.Occur.MUST);
realMLTQuery.add(createIdQuery(req.getSchema().getUniqueKeyField().getName(), id), BooleanClause.Occur.MUST_NOT);
return realMLTQuery.build();
} catch (IOException e) {
e.printStackTrace();
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Bad Request");
}
}
use of org.apache.solr.schema.SchemaField in project lucene-solr by apache.
the class SimpleMLTQParser method parse.
public Query parse() {
String defaultField = req.getSchema().getUniqueKeyField().getName();
String uniqueValue = localParams.get(QueryParsing.V);
String[] qf = localParams.getParams("qf");
SolrIndexSearcher searcher = req.getSearcher();
Query docIdQuery = createIdQuery(defaultField, uniqueValue);
Map<String, Float> boostFields = new HashMap<>();
try {
TopDocs td = searcher.search(docIdQuery, 1);
if (td.totalHits != 1)
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error completing MLT request. Could not fetch " + "document with id [" + uniqueValue + "]");
ScoreDoc[] scoreDocs = td.scoreDocs;
MoreLikeThis mlt = new MoreLikeThis(req.getSearcher().getIndexReader());
mlt.setMinTermFreq(localParams.getInt("mintf", MoreLikeThis.DEFAULT_MIN_TERM_FREQ));
mlt.setMinDocFreq(localParams.getInt("mindf", MoreLikeThis.DEFAULT_MIN_DOC_FREQ));
mlt.setMinWordLen(localParams.getInt("minwl", MoreLikeThis.DEFAULT_MIN_WORD_LENGTH));
mlt.setMaxWordLen(localParams.getInt("maxwl", MoreLikeThis.DEFAULT_MAX_WORD_LENGTH));
mlt.setMaxQueryTerms(localParams.getInt("maxqt", MoreLikeThis.DEFAULT_MAX_QUERY_TERMS));
mlt.setMaxNumTokensParsed(localParams.getInt("maxntp", MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED));
mlt.setMaxDocFreq(localParams.getInt("maxdf", MoreLikeThis.DEFAULT_MAX_DOC_FREQ));
Boolean boost = localParams.getBool("boost", false);
mlt.setBoost(boost);
String[] fieldNames;
if (qf != null) {
ArrayList<String> fields = new ArrayList<>();
for (String fieldName : qf) {
if (!StringUtils.isEmpty(fieldName)) {
String[] strings = splitList.split(fieldName);
for (String string : strings) {
if (!StringUtils.isEmpty(string)) {
fields.add(string);
}
}
}
}
// Parse field names and boosts from the fields
boostFields = SolrPluginUtils.parseFieldBoosts(fields.toArray(new String[0]));
fieldNames = boostFields.keySet().toArray(new String[0]);
} else {
Map<String, SchemaField> fieldDefinitions = req.getSearcher().getSchema().getFields();
ArrayList<String> fields = new ArrayList();
for (String fieldName : fieldDefinitions.keySet()) {
if (fieldDefinitions.get(fieldName).indexed() && fieldDefinitions.get(fieldName).stored())
if (fieldDefinitions.get(fieldName).getType().getNumberType() == null)
fields.add(fieldName);
}
fieldNames = fields.toArray(new String[0]);
}
if (fieldNames.length < 1) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "MoreLikeThis requires at least one similarity field: qf");
}
mlt.setFieldNames(fieldNames);
mlt.setAnalyzer(req.getSchema().getIndexAnalyzer());
Query rawMLTQuery = mlt.like(scoreDocs[0].doc);
BooleanQuery boostedMLTQuery = (BooleanQuery) rawMLTQuery;
if (boost && boostFields.size() > 0) {
BooleanQuery.Builder newQ = new BooleanQuery.Builder();
newQ.setMinimumNumberShouldMatch(boostedMLTQuery.getMinimumNumberShouldMatch());
for (BooleanClause clause : boostedMLTQuery) {
Query q = clause.getQuery();
float originalBoost = 1f;
if (q instanceof BoostQuery) {
BoostQuery bq = (BoostQuery) q;
q = bq.getQuery();
originalBoost = bq.getBoost();
}
Float fieldBoost = boostFields.get(((TermQuery) q).getTerm().field());
q = ((fieldBoost != null) ? new BoostQuery(q, fieldBoost * originalBoost) : clause.getQuery());
newQ.add(q, clause.getOccur());
}
boostedMLTQuery = newQ.build();
}
// exclude current document from results
BooleanQuery.Builder realMLTQuery = new BooleanQuery.Builder();
realMLTQuery.add(boostedMLTQuery, BooleanClause.Occur.MUST);
realMLTQuery.add(docIdQuery, BooleanClause.Occur.MUST_NOT);
return realMLTQuery.build();
} catch (IOException e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error completing MLT request" + e.getMessage());
}
}
use of org.apache.solr.schema.SchemaField in project lucene-solr by apache.
the class ChildFieldValueSourceParser method parse.
@Override
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
final String sortFieldName = fp.parseArg();
final Query query;
if (fp.hasMoreArguments()) {
query = fp.parseNestedQuery();
} else {
query = fp.subQuery(fp.getParam(CommonParams.Q), BlockJoinParentQParserPlugin.NAME).getQuery();
}
BitSetProducer parentFilter;
BitSetProducer childFilter;
SchemaField sf;
try {
AllParentsAware bjQ;
if (!(query instanceof AllParentsAware)) {
throw new SyntaxError("expect a reference to block join query " + AllParentsAware.class.getSimpleName() + " in " + fp.getString());
}
bjQ = (AllParentsAware) query;
parentFilter = BlockJoinParentQParser.getCachedFilter(fp.getReq(), bjQ.getParentQuery()).filter;
childFilter = BlockJoinParentQParser.getCachedFilter(fp.getReq(), bjQ.getChildQuery()).filter;
if (sortFieldName == null || sortFieldName.equals("")) {
throw new SyntaxError("field is omitted in " + fp.getString());
}
sf = fp.getReq().getSchema().getFieldOrNull(sortFieldName);
if (null == sf) {
throw new SyntaxError(NAME + " sort param field \"" + sortFieldName + "\" can't be found in schema");
}
} catch (SyntaxError e) {
log.error("can't parse " + fp.getString(), e);
throw e;
}
return new BlockJoinSortFieldValueSource(childFilter, parentFilter, sf);
}
use of org.apache.solr.schema.SchemaField in project lucene-solr by apache.
the class CarrotClusteringEngine method init.
@Override
@SuppressWarnings("rawtypes")
public String init(NamedList config, final SolrCore core) {
this.core = core;
String result = super.init(config, core);
final SolrParams initParams = SolrParams.toSolrParams(config);
// Initialization attributes for Carrot2 controller.
HashMap<String, Object> initAttributes = new HashMap<>();
// Customize Carrot2's resource lookup to first look for resources
// using Solr's resource loader. If that fails, try loading from the classpath.
ResourceLookup resourceLookup = new ResourceLookup(// Solr-specific resource loading.
new SolrResourceLocator(core, initParams), // Using the class loader directly because this time we want to omit the prefix
new ClassLoaderLocator(core.getResourceLoader().getClassLoader()));
DefaultLexicalDataFactoryDescriptor.attributeBuilder(initAttributes).resourceLookup(resourceLookup);
// Make sure the requested Carrot2 clustering algorithm class is available
String carrotAlgorithmClassName = initParams.get(CarrotParams.ALGORITHM);
try {
this.clusteringAlgorithmClass = core.getResourceLoader().findClass(carrotAlgorithmClassName, IClusteringAlgorithm.class);
} catch (SolrException s) {
if (!(s.getCause() instanceof ClassNotFoundException)) {
throw s;
}
}
// Load Carrot2-Workbench exported attribute XMLs based on the 'name' attribute
// of this component. This by-name convention lookup is used to simplify configuring algorithms.
String componentName = initParams.get(ClusteringEngine.ENGINE_NAME);
log.info("Initializing Clustering Engine '" + MoreObjects.firstNonNull(componentName, "<no 'name' attribute>") + "'");
if (!Strings.isNullOrEmpty(componentName)) {
IResource[] attributeXmls = resourceLookup.getAll(componentName + "-attributes.xml");
if (attributeXmls.length > 0) {
if (attributeXmls.length > 1) {
log.warn("More than one attribute file found, first one will be used: " + Arrays.toString(attributeXmls));
}
Thread ct = Thread.currentThread();
ClassLoader prev = ct.getContextClassLoader();
try {
ct.setContextClassLoader(core.getResourceLoader().getClassLoader());
AttributeValueSets avs = AttributeValueSets.deserialize(attributeXmls[0].open());
AttributeValueSet defaultSet = avs.getDefaultAttributeValueSet();
initAttributes.putAll(defaultSet.getAttributeValues());
} catch (Exception e) {
throw new SolrException(ErrorCode.SERVER_ERROR, "Could not read attributes XML for clustering component: " + componentName, e);
} finally {
ct.setContextClassLoader(prev);
}
}
}
// Extract solrconfig attributes, they take precedence.
extractCarrotAttributes(initParams, initAttributes);
// Customize the stemmer and tokenizer factories. The implementations we provide here
// are included in the code base of Solr, so that it's possible to refactor
// the Lucene APIs the factories rely on if needed.
// Additionally, we set a custom lexical resource factory for Carrot2 that
// will use both Carrot2 default stop words as well as stop words from
// the StopFilter defined on the field.
final AttributeBuilder attributeBuilder = BasicPreprocessingPipelineDescriptor.attributeBuilder(initAttributes);
attributeBuilder.lexicalDataFactory(SolrStopwordsCarrot2LexicalDataFactory.class);
if (!initAttributes.containsKey(BasicPreprocessingPipelineDescriptor.Keys.TOKENIZER_FACTORY)) {
attributeBuilder.tokenizerFactory(LuceneCarrot2TokenizerFactory.class);
}
if (!initAttributes.containsKey(BasicPreprocessingPipelineDescriptor.Keys.STEMMER_FACTORY)) {
attributeBuilder.stemmerFactory(LuceneCarrot2StemmerFactory.class);
}
// Pass the schema (via the core) to SolrStopwordsCarrot2LexicalDataFactory.
initAttributes.put("solrCore", core);
// Carrot2 uses current thread's context class loader to get
// certain classes (e.g. custom tokenizer/stemmer) at initialization time.
// To make sure classes from contrib JARs are available,
// we swap the context class loader for the time of clustering.
Thread ct = Thread.currentThread();
ClassLoader prev = ct.getContextClassLoader();
try {
ct.setContextClassLoader(core.getResourceLoader().getClassLoader());
this.controller.init(initAttributes);
} finally {
ct.setContextClassLoader(prev);
}
SchemaField uniqueField = core.getLatestSchema().getUniqueKeyField();
if (uniqueField == null) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, CarrotClusteringEngine.class.getSimpleName() + " requires the schema to have a uniqueKeyField");
}
this.idFieldName = uniqueField.getName();
return result;
}
Aggregations