use of org.apache.solr.schema.FieldType in project lucene-solr by apache.
the class SolrQueryParserBase method getBooleanQuery.
/**
* Factory method for generating query, given a set of clauses.
* By default creates a boolean query composed of clauses passed in.
*
* Can be overridden by extending classes, to modify query being
* returned.
*
* @param clauses List that contains {@link org.apache.lucene.search.BooleanClause} instances
* to join.
*
* @return Resulting {@link org.apache.lucene.search.Query} object.
*/
protected Query getBooleanQuery(List<BooleanClause> clauses) throws SyntaxError {
if (clauses.size() == 0) {
// all clause words were filtered away by the analyzer.
return null;
}
SchemaField sfield = null;
List<RawQuery> fieldValues = null;
boolean onlyRawQueries = true;
int allRawQueriesTermCount = 0;
for (BooleanClause clause : clauses) {
if (clause.getQuery() instanceof RawQuery) {
allRawQueriesTermCount += ((RawQuery) clause.getQuery()).getTermCount();
} else {
onlyRawQueries = false;
}
}
boolean useTermsQuery = (flags & QParser.FLAG_FILTER) != 0 && allRawQueriesTermCount > TERMS_QUERY_THRESHOLD;
BooleanQuery.Builder booleanBuilder = newBooleanQuery();
Map<SchemaField, List<RawQuery>> fmap = new HashMap<>();
for (BooleanClause clause : clauses) {
Query subq = clause.getQuery();
if (subq instanceof RawQuery) {
if (clause.getOccur() != BooleanClause.Occur.SHOULD) {
// We only collect optional terms for set queries. Since this isn't optional,
// convert the raw query to a normal query and handle as usual.
clause = new BooleanClause(rawToNormal(subq), clause.getOccur());
} else {
// Optional raw query.
RawQuery rawq = (RawQuery) subq;
// only look up fmap and type info on a field change
if (sfield != rawq.sfield) {
sfield = rawq.sfield;
fieldValues = fmap.get(sfield);
// the "useTermQuery" check.
if ((fieldValues == null && useTermsQuery) || !sfield.indexed()) {
fieldValues = new ArrayList<>(2);
fmap.put(sfield, fieldValues);
}
}
if (fieldValues != null) {
fieldValues.add(rawq);
continue;
}
clause = new BooleanClause(rawToNormal(subq), clause.getOccur());
}
}
booleanBuilder.add(clause);
}
for (Map.Entry<SchemaField, List<RawQuery>> entry : fmap.entrySet()) {
sfield = entry.getKey();
fieldValues = entry.getValue();
FieldType ft = sfield.getType();
// TODO: pull more of this logic out to FieldType? We would need to be able to add clauses to our existing booleanBuilder.
int termCount = fieldValues.stream().mapToInt(RawQuery::getTermCount).sum();
if ((sfield.indexed() && termCount < TERMS_QUERY_THRESHOLD) || termCount == 1) {
// use boolean query instead
for (RawQuery rawq : fieldValues) {
Query subq;
if (ft.isTokenized() && sfield.indexed()) {
boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField) ft).getAutoGeneratePhraseQueries();
boolean fieldEnableGraphQueries = ft instanceof TextField && ((TextField) ft).getEnableGraphQueries();
subq = newFieldQuery(getAnalyzer(), sfield.getName(), rawq.getJoinedExternalVal(), false, fieldAutoGenPhraseQueries, fieldEnableGraphQueries);
booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
} else {
for (String externalVal : rawq.getExternalVals()) {
subq = ft.getFieldQuery(this.parser, sfield, externalVal);
booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
}
}
}
} else {
List<String> externalVals = fieldValues.stream().flatMap(rawq -> rawq.getExternalVals().stream()).collect(Collectors.toList());
Query subq = ft.getSetQuery(this.parser, sfield, externalVals);
// if this is everything, don't wrap in a boolean query
if (onlyRawQueries && termCount == allRawQueriesTermCount)
return subq;
booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
}
}
BooleanQuery bq = booleanBuilder.build();
if (bq.clauses().size() == 1) {
// Unwrap single SHOULD query
BooleanClause clause = bq.clauses().iterator().next();
if (clause.getOccur() == BooleanClause.Occur.SHOULD) {
return clause.getQuery();
}
}
return bq;
}
use of org.apache.solr.schema.FieldType in project lucene-solr by apache.
the class CSVWriter method writeResponse.
public void writeResponse() throws IOException {
SolrParams params = req.getParams();
strategy = new CSVStrategy(',', '"', CSVStrategy.COMMENTS_DISABLED, CSVStrategy.ESCAPE_DISABLED, false, false, false, true, "\n");
CSVStrategy strat = strategy;
String sep = params.get(CSV_SEPARATOR);
if (sep != null) {
if (sep.length() != 1)
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid separator:'" + sep + "'");
strat.setDelimiter(sep.charAt(0));
}
String nl = params.get(CSV_NEWLINE);
if (nl != null) {
if (nl.length() == 0)
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid newline:'" + nl + "'");
strat.setPrinterNewline(nl);
}
String encapsulator = params.get(CSV_ENCAPSULATOR);
String escape = params.get(CSV_ESCAPE);
if (encapsulator != null) {
if (encapsulator.length() != 1)
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid encapsulator:'" + encapsulator + "'");
strat.setEncapsulator(encapsulator.charAt(0));
}
if (escape != null) {
if (escape.length() != 1)
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid escape:'" + escape + "'");
strat.setEscape(escape.charAt(0));
if (encapsulator == null) {
strat.setEncapsulator(CSVStrategy.ENCAPSULATOR_DISABLED);
}
}
if (strat.getEscape() == '\\') {
// If the escape is the standard backslash, then also enable
// unicode escapes (it's harmless since 'u' would not otherwise
// be escaped.
strat.setUnicodeEscapeInterpretation(true);
}
printer = new CSVPrinter(writer, strategy);
CSVStrategy mvStrategy = new CSVStrategy(strategy.getDelimiter(), CSVStrategy.ENCAPSULATOR_DISABLED, CSVStrategy.COMMENTS_DISABLED, '\\', false, false, false, false, "\n");
strat = mvStrategy;
sep = params.get(MV_SEPARATOR);
if (sep != null) {
if (sep.length() != 1)
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid mv separator:'" + sep + "'");
strat.setDelimiter(sep.charAt(0));
}
encapsulator = params.get(MV_ENCAPSULATOR);
escape = params.get(MV_ESCAPE);
if (encapsulator != null) {
if (encapsulator.length() != 1)
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid mv encapsulator:'" + encapsulator + "'");
strat.setEncapsulator(encapsulator.charAt(0));
if (escape == null) {
strat.setEscape(CSVStrategy.ESCAPE_DISABLED);
}
}
escape = params.get(MV_ESCAPE);
if (escape != null) {
if (escape.length() != 1)
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid mv escape:'" + escape + "'");
strat.setEscape(escape.charAt(0));
// encapsulator will already be disabled if it wasn't specified
}
Collection<String> fields = returnFields.getRequestedFieldNames();
Object responseObj = rsp.getResponse();
boolean returnOnlyStored = false;
if (fields == null || returnFields.hasPatternMatching()) {
if (responseObj instanceof SolrDocumentList) {
// get the list of fields from the SolrDocumentList
if (fields == null) {
fields = new LinkedHashSet<>();
}
for (SolrDocument sdoc : (SolrDocumentList) responseObj) {
fields.addAll(sdoc.getFieldNames());
}
} else {
// get the list of fields from the index
Iterable<String> all = req.getSearcher().getFieldNames();
if (fields == null) {
fields = Sets.newHashSet(all);
} else {
Iterables.addAll(fields, all);
}
}
if (returnFields.wantsScore()) {
fields.add("score");
} else {
fields.remove("score");
}
returnOnlyStored = true;
}
CSVSharedBufPrinter csvPrinterMV = new CSVSharedBufPrinter(mvWriter, mvStrategy);
for (String field : fields) {
if (!returnFields.wantsField(field)) {
continue;
}
if (field.equals("score")) {
CSVField csvField = new CSVField();
csvField.name = "score";
csvFields.put("score", csvField);
continue;
}
SchemaField sf = schema.getFieldOrNull(field);
if (sf == null) {
FieldType ft = new StrField();
sf = new SchemaField(field, ft);
}
// Return only stored fields, unless an explicit field list is specified
if (returnOnlyStored && sf != null && !sf.stored()) {
continue;
}
// check for per-field overrides
sep = params.get("f." + field + '.' + CSV_SEPARATOR);
encapsulator = params.get("f." + field + '.' + CSV_ENCAPSULATOR);
escape = params.get("f." + field + '.' + CSV_ESCAPE);
// if polyfield and no escape is provided, add "\\" escape by default
if (sf.isPolyField()) {
escape = (escape == null) ? "\\" : escape;
}
CSVSharedBufPrinter csvPrinter = csvPrinterMV;
if (sep != null || encapsulator != null || escape != null) {
// create a new strategy + printer if there were any per-field overrides
strat = (CSVStrategy) mvStrategy.clone();
if (sep != null) {
if (sep.length() != 1)
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid mv separator:'" + sep + "'");
strat.setDelimiter(sep.charAt(0));
}
if (encapsulator != null) {
if (encapsulator.length() != 1)
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid mv encapsulator:'" + encapsulator + "'");
strat.setEncapsulator(encapsulator.charAt(0));
if (escape == null) {
strat.setEscape(CSVStrategy.ESCAPE_DISABLED);
}
}
if (escape != null) {
if (escape.length() != 1)
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid mv escape:'" + escape + "'");
strat.setEscape(escape.charAt(0));
if (encapsulator == null) {
strat.setEncapsulator(CSVStrategy.ENCAPSULATOR_DISABLED);
}
}
csvPrinter = new CSVSharedBufPrinter(mvWriter, strat);
}
CSVField csvField = new CSVField();
csvField.name = field;
csvField.sf = sf;
csvField.mvPrinter = csvPrinter;
csvFields.put(field, csvField);
}
NullValue = params.get(CSV_NULL, "");
if (params.getBool(CSV_HEADER, true)) {
for (CSVField csvField : csvFields.values()) {
printer.print(csvField.name);
}
printer.println();
}
if (responseObj instanceof ResultContext) {
writeDocuments(null, (ResultContext) responseObj);
} else if (responseObj instanceof DocList) {
ResultContext ctx = new BasicResultContext((DocList) responseObj, returnFields, null, null, req);
writeDocuments(null, ctx);
} else if (responseObj instanceof SolrDocumentList) {
writeSolrDocumentList(null, (SolrDocumentList) responseObj, returnFields);
}
}
use of org.apache.solr.schema.FieldType in project lucene-solr by apache.
the class SolrQueryParserBase method getWildcardQuery.
// called from parser
protected Query getWildcardQuery(String field, String termStr) throws SyntaxError {
checkNullField(field);
// *:* -> MatchAllDocsQuery
if ("*".equals(termStr)) {
if ("*".equals(field) || getExplicitField() == null) {
return newMatchAllDocsQuery();
}
}
FieldType fieldType = schema.getFieldType(field);
termStr = analyzeIfMultitermTermText(field, termStr, fieldType);
// can we use reversed wildcards in this field?
ReversedWildcardFilterFactory factory = getReversedWildcardFilterFactory(fieldType);
if (factory != null) {
Term term = new Term(field, termStr);
// fsa representing the query
Automaton automaton = WildcardQuery.toAutomaton(term);
// TODO: we should likely use the automaton to calculate shouldReverse, too.
if (factory.shouldReverse(termStr)) {
automaton = Operations.concatenate(automaton, Automata.makeChar(factory.getMarkerChar()));
automaton = Operations.reverse(automaton);
} else {
// reverse wildcardfilter is active: remove false positives
// fsa representing false positives (markerChar*)
Automaton falsePositives = Operations.concatenate(Automata.makeChar(factory.getMarkerChar()), Automata.makeAnyString());
// subtract these away
automaton = Operations.minus(automaton, falsePositives, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
}
return new AutomatonQuery(term, automaton) {
// override toString so it's completely transparent
@Override
public String toString(String field) {
StringBuilder buffer = new StringBuilder();
if (!getField().equals(field)) {
buffer.append(getField());
buffer.append(":");
}
buffer.append(term.text());
return buffer.toString();
}
};
}
// Solr has always used constant scoring for wildcard queries. This should return constant scoring by default.
return newWildcardQuery(new Term(field, termStr));
}
use of org.apache.solr.schema.FieldType in project lucene-solr by apache.
the class IntervalFacets method getCountMultiValuedNumeric.
private void getCountMultiValuedNumeric() throws IOException {
final FieldType ft = schemaField.getType();
final String fieldName = schemaField.getName();
if (ft.getNumberType() == null) {
throw new IllegalStateException();
}
final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
LeafReaderContext ctx = null;
SortedNumericDocValues longs = null;
for (DocIterator docsIt = docs.iterator(); docsIt.hasNext(); ) {
final int doc = docsIt.nextDoc();
if (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc()) {
do {
ctx = ctxIt.next();
} while (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc());
assert doc >= ctx.docBase;
longs = DocValues.getSortedNumeric(ctx.reader(), fieldName);
}
int valuesDocID = longs.docID();
if (valuesDocID < doc - ctx.docBase) {
valuesDocID = longs.advance(doc - ctx.docBase);
}
if (valuesDocID == doc - ctx.docBase) {
accumIntervalWithMultipleValues(longs);
}
}
}
use of org.apache.solr.schema.FieldType in project lucene-solr by apache.
the class NumericFacets method getCountsMultiValued.
private static NamedList<Integer> getCountsMultiValued(SolrIndexSearcher searcher, DocSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort) throws IOException {
// If facet.mincount=0 with PointFields the only option is to get the values from DocValues
// not currently supported. See SOLR-10033
mincount = Math.max(mincount, 1);
final SchemaField sf = searcher.getSchema().getField(fieldName);
final FieldType ft = sf.getType();
assert sf.multiValued();
final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
// 1. accumulate
final HashTable hashTable = new HashTable(false);
final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
LeafReaderContext ctx = null;
SortedNumericDocValues longs = null;
int missingCount = 0;
for (DocIterator docsIt = docs.iterator(); docsIt.hasNext(); ) {
final int doc = docsIt.nextDoc();
if (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc()) {
do {
ctx = ctxIt.next();
} while (ctx == null || doc >= ctx.docBase + ctx.reader().maxDoc());
assert doc >= ctx.docBase;
longs = DocValues.getSortedNumeric(ctx.reader(), fieldName);
}
int valuesDocID = longs.docID();
if (valuesDocID < doc - ctx.docBase) {
valuesDocID = longs.advance(doc - ctx.docBase);
}
if (valuesDocID == doc - ctx.docBase) {
// This document must have at least one value
long l = longs.nextValue();
hashTable.add(l, 1);
for (int i = 1; i < longs.docValueCount(); i++) {
long lnew = longs.nextValue();
if (lnew > l) {
// Skip the value if it's equal to the last one, we don't want to double-count it
hashTable.add(lnew, 1);
}
l = lnew;
}
} else {
++missingCount;
}
}
// 2. select top-k facet values
final int pqSize = limit < 0 ? hashTable.size : Math.min(offset + limit, hashTable.size);
final PriorityQueue<Entry> pq;
if (FacetParams.FACET_SORT_COUNT.equals(sort) || FacetParams.FACET_SORT_COUNT_LEGACY.equals(sort)) {
pq = new PriorityQueue<Entry>(pqSize) {
@Override
protected boolean lessThan(Entry a, Entry b) {
if (a.count < b.count || (a.count == b.count && a.bits > b.bits)) {
return true;
} else {
return false;
}
}
};
} else {
// sort=index
pq = new PriorityQueue<Entry>(pqSize) {
@Override
protected boolean lessThan(Entry a, Entry b) {
return a.bits > b.bits;
}
};
}
Entry e = null;
for (int i = 0; i < hashTable.bits.length; ++i) {
if (hashTable.counts[i] >= mincount) {
if (e == null) {
e = new Entry();
}
e.bits = hashTable.bits[i];
e.count = hashTable.counts[i];
e = pq.insertWithOverflow(e);
}
}
// 4. build the NamedList
final NamedList<Integer> result = new NamedList<>(Math.max(pq.size() - offset + 1, 1));
final Deque<Entry> counts = new ArrayDeque<>(pq.size() - offset);
while (pq.size() > offset) {
counts.addFirst(pq.pop());
}
for (Entry entry : counts) {
// TODO: convert to correct value
result.add(bitsToStringValue(ft, entry.bits), entry.count);
}
if (missing) {
result.add(null, missingCount);
}
return result;
}
Aggregations