use of io.zulia.message.ZuliaBase.Similarity in project zuliasearch by zuliaio.
the class QueryController method get.
@Get
@Produces({ MediaType.APPLICATION_JSON + ";charset=utf-8", MediaType.TEXT_PLAIN + ";charset=utf-8" })
public HttpResponse<?> get(@QueryValue(ZuliaConstants.INDEX) List<String> indexName, @QueryValue(value = ZuliaConstants.QUERY, defaultValue = "*:*") String query, @Nullable @QueryValue(ZuliaConstants.QUERY_FIELD) List<String> queryFields, @Nullable @QueryValue(ZuliaConstants.FILTER_QUERY) List<String> filterQueries, @Nullable @QueryValue(ZuliaConstants.QUERY_JSON) List<String> queryJsonList, @Nullable @QueryValue(ZuliaConstants.FIELDS) List<String> fields, @QueryValue(value = ZuliaConstants.FETCH, defaultValue = "true") Boolean fetch, @QueryValue(value = ZuliaConstants.ROWS, defaultValue = "0") Integer rows, @Nullable @QueryValue(ZuliaConstants.FACET) List<String> facet, @Nullable @QueryValue(ZuliaConstants.DRILL_DOWN) List<String> drillDowns, @Nullable @QueryValue(ZuliaConstants.DEFAULT_OP) String defaultOperator, @Nullable @QueryValue(ZuliaConstants.SORT) List<String> sort, @QueryValue(value = ZuliaConstants.PRETTY, defaultValue = "true") Boolean pretty, @Nullable @QueryValue(value = ZuliaConstants.DISMAX, defaultValue = "false") Boolean dismax, @Nullable @QueryValue(ZuliaConstants.DISMAX_TIE) Float dismaxTie, @Nullable @QueryValue(ZuliaConstants.MIN_MATCH) Integer mm, @Nullable @QueryValue(ZuliaConstants.SIMILARITY) List<String> similarity, @QueryValue(value = ZuliaConstants.DEBUG, defaultValue = "false") Boolean debug, @QueryValue(value = ZuliaConstants.DONT_CACHE, defaultValue = "true") Boolean dontCache, @Nullable @QueryValue(ZuliaConstants.START) Integer start, @Nullable @QueryValue(ZuliaConstants.HIGHLIGHT) List<String> highlightList, @Nullable @QueryValue(ZuliaConstants.HIGHLIGHT_JSON) List<String> highlightJsonList, @Nullable @QueryValue(ZuliaConstants.ANALYZE_JSON) List<String> analyzeJsonList, @QueryValue(value = ZuliaConstants.FORMAT, defaultValue = "json") String format, @QueryValue(value = ZuliaConstants.BATCH, defaultValue = "false") Boolean batch, @QueryValue(value = ZuliaConstants.BATCH_SIZE, defaultValue = "500") Integer batchSize, @Nullable @QueryValue(ZuliaConstants.CURSOR) String cursor, @QueryValue(value = ZuliaConstants.TRUNCATE, defaultValue = "false") Boolean truncate) {
ZuliaIndexManager indexManager = ZuliaNodeProvider.getZuliaNode().getIndexManager();
QueryRequest.Builder qrBuilder = QueryRequest.newBuilder().addAllIndex(indexName);
try {
boolean outputCursor = false;
if (cursor != null) {
if (!cursor.equals("0")) {
qrBuilder.setLastResult(CursorHelper.getLastResultFromCursor(cursor));
}
outputCursor = true;
if (sort == null || sort.isEmpty()) {
return HttpResponse.created("Sort on unique value or value combination is required to use a cursor (i.e. id or title,id)").status(ZuliaConstants.INTERNAL_ERROR);
}
}
if (debug != null) {
qrBuilder.setDebug(debug);
}
if (start != null) {
qrBuilder.setStart(start);
}
if (dontCache != null) {
qrBuilder.setDontCache(dontCache);
}
Query.Builder mainQueryBuilder = Query.newBuilder();
if (query != null) {
mainQueryBuilder.setQ(query);
}
if (mm != null) {
mainQueryBuilder.setMm(mm);
}
if (dismax != null) {
mainQueryBuilder.setDismax(dismax);
if (dismaxTie != null) {
mainQueryBuilder.setDismaxTie(dismaxTie);
}
}
if (queryFields != null) {
mainQueryBuilder.addAllQf(queryFields);
}
if (defaultOperator != null) {
if (defaultOperator.equalsIgnoreCase("AND")) {
mainQueryBuilder.setDefaultOp(Query.Operator.AND);
} else if (defaultOperator.equalsIgnoreCase("OR")) {
mainQueryBuilder.setDefaultOp(Query.Operator.OR);
} else {
HttpResponse.created("Invalid default operator <" + defaultOperator + ">").status(ZuliaConstants.INTERNAL_ERROR);
}
}
mainQueryBuilder.setQueryType(Query.QueryType.SCORE_MUST);
qrBuilder.addQuery(mainQueryBuilder);
if (similarity != null) {
for (String sim : similarity) {
if (sim.contains(":")) {
int i = sim.indexOf(":");
String field = sim.substring(0, i);
String simType = sim.substring(i + 1);
FieldSimilarity.Builder fieldSimilarity = FieldSimilarity.newBuilder();
fieldSimilarity.setField(field);
if (simType.equalsIgnoreCase("bm25")) {
fieldSimilarity.setSimilarity(Similarity.BM25);
} else if (simType.equalsIgnoreCase("constant")) {
fieldSimilarity.setSimilarity(Similarity.CONSTANT);
} else if (simType.equalsIgnoreCase("tf")) {
fieldSimilarity.setSimilarity(Similarity.TF);
} else if (simType.equalsIgnoreCase("tfidf")) {
fieldSimilarity.setSimilarity(Similarity.TFIDF);
} else {
HttpResponse.created("Unknown similarity type <" + simType + ">").status(ZuliaConstants.INTERNAL_ERROR);
}
qrBuilder.addFieldSimilarity(fieldSimilarity);
} else {
HttpResponse.created("Similarity <" + sim + "> should be in the form field:simType").status(ZuliaConstants.INTERNAL_ERROR);
}
}
}
if (filterQueries != null) {
for (String filterQuery : filterQueries) {
Query filterQueryBuilder = Query.newBuilder().setQ(filterQuery).setQueryType(Query.QueryType.FILTER).build();
qrBuilder.addQuery(filterQueryBuilder);
}
}
if (queryJsonList != null) {
for (String queryJson : queryJsonList) {
try {
Query.Builder subQueryBuilder = Query.newBuilder();
JsonFormat.parser().merge(queryJson, subQueryBuilder);
qrBuilder.addQuery(subQueryBuilder);
} catch (InvalidProtocolBufferException e) {
return HttpResponse.created("Failed to parse query json: " + e.getClass().getSimpleName() + ":" + e.getMessage()).status(ZuliaConstants.INTERNAL_ERROR);
}
}
}
if (highlightList != null) {
for (String hl : highlightList) {
HighlightRequest highlightRequest = HighlightRequest.newBuilder().setField(hl).build();
qrBuilder.addHighlightRequest(highlightRequest);
}
}
if (highlightJsonList != null) {
for (String hlJson : highlightJsonList) {
try {
HighlightRequest.Builder hlBuilder = HighlightRequest.newBuilder();
JsonFormat.parser().merge(hlJson, hlBuilder);
qrBuilder.addHighlightRequest(hlBuilder);
} catch (InvalidProtocolBufferException e) {
return HttpResponse.created("Failed to parse highlight json: " + e.getClass().getSimpleName() + ":" + e.getMessage()).status(ZuliaConstants.INTERNAL_ERROR);
}
}
}
if (analyzeJsonList != null) {
for (String alJson : analyzeJsonList) {
try {
AnalysisRequest.Builder analyzeRequestBuilder = AnalysisRequest.newBuilder();
JsonFormat.parser().merge(alJson, analyzeRequestBuilder);
qrBuilder.addAnalysisRequest(analyzeRequestBuilder);
} catch (InvalidProtocolBufferException e) {
return HttpResponse.created("Failed to parse analyzer json: " + e.getClass().getSimpleName() + ":" + e.getMessage()).status(ZuliaConstants.INTERNAL_ERROR);
}
}
}
if (fields != null) {
for (String field : fields) {
if (field.startsWith("-")) {
qrBuilder.addDocumentMaskedFields(field.substring(1));
} else {
qrBuilder.addDocumentFields(field);
}
}
}
qrBuilder.setResultFetchType(FetchType.FULL);
if (fetch != null && !fetch) {
qrBuilder.setResultFetchType(FetchType.NONE);
}
FacetRequest.Builder frBuilder = FacetRequest.newBuilder();
if (facet != null) {
for (String f : facet) {
Integer count = null;
if (f.contains(":")) {
String countString = f.substring(f.indexOf(":") + 1);
f = f.substring(0, f.indexOf(":"));
try {
count = Integer.parseInt(countString);
} catch (Exception e) {
return HttpResponse.created("Invalid facet count <" + countString + "> for facet <" + f + ">").status(ZuliaConstants.INTERNAL_ERROR);
}
}
CountRequest.Builder countBuilder = CountRequest.newBuilder();
Facet zuliaFacet = Facet.newBuilder().setLabel(f).build();
CountRequest.Builder facetBuilder = countBuilder.setFacetField(zuliaFacet);
if (count != null) {
facetBuilder.setMaxFacets(count);
}
frBuilder.addCountRequest(facetBuilder);
}
}
if (drillDowns != null) {
for (String drillDown : drillDowns) {
if (drillDown.contains(":")) {
String value = drillDown.substring(drillDown.indexOf(":") + 1);
String field = drillDown.substring(0, drillDown.indexOf(":"));
frBuilder.addDrillDown(Facet.newBuilder().setLabel(field).setValue(value));
}
}
}
qrBuilder.setFacetRequest(frBuilder);
if (sort != null) {
SortRequest.Builder sortRequest = SortRequest.newBuilder();
for (String sortField : sort) {
FieldSort.Builder fieldSort = FieldSort.newBuilder();
if (sortField.contains(":")) {
String sortDir = sortField.substring(sortField.indexOf(":") + 1);
sortField = sortField.substring(0, sortField.indexOf(":"));
if ("-1".equals(sortDir) || "DESC".equalsIgnoreCase(sortDir)) {
fieldSort.setDirection(FieldSort.Direction.DESCENDING);
} else if ("1".equals(sortDir) || "ASC".equalsIgnoreCase(sortDir)) {
fieldSort.setDirection(FieldSort.Direction.ASCENDING);
} else {
return HttpResponse.created("Invalid sort direction <" + sortDir + "> for field <" + sortField + ">. Expecting -1/1 or DESC/ASC").status(ZuliaConstants.INTERNAL_ERROR);
}
}
fieldSort.setSortField(sortField);
sortRequest.addFieldSort(fieldSort);
}
qrBuilder.setSortRequest(sortRequest);
}
qrBuilder.setAmount(rows);
if (format.equals("json")) {
QueryResponse qr = indexManager.query(qrBuilder.build());
String response = getStandardResponse(qr, !pretty, outputCursor, truncate);
if (pretty) {
response = JsonWriter.formatJson(response);
}
return HttpResponse.ok(response).status(ZuliaConstants.SUCCESS).contentType(MediaType.APPLICATION_JSON_TYPE);
} else {
if (fields != null && !fields.isEmpty()) {
if (batch) {
qrBuilder.setAmount(batchSize);
Writable writable = output -> {
try {
QueryResponse qr = indexManager.query(qrBuilder.build());
String header = buildHeaderForCSV(fields);
output.write(header);
output.flush();
int count = 0;
while (qr.getResultsList().size() > 0) {
for (ScoredResult scoredResult : qr.getResultsList()) {
Document doc = ResultHelper.getDocumentFromScoredResult(scoredResult);
appendDocument(fields, null, output, doc);
count++;
if (count % 1000 == 0) {
LOG.info("Docs processed so far: " + count);
}
}
qrBuilder.setLastResult(qr.getLastResult());
qr = indexManager.query(qrBuilder.build());
}
} catch (Exception e) {
e.printStackTrace();
}
};
LocalDateTime now = LocalDateTime.now();
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd-H-mm-ss");
return HttpResponse.ok(writable).status(ZuliaConstants.SUCCESS).header("content-disposition", "attachment; filename = " + "zuliaDownload_" + now.format(formatter) + ".csv").contentType(MediaType.APPLICATION_OCTET_STREAM);
} else {
QueryResponse qr = indexManager.query(qrBuilder.build());
String response = getCSVDocumentResponse(fields, qr);
return HttpResponse.ok(response).status(ZuliaConstants.SUCCESS).contentType(MediaType.TEXT_PLAIN + ";charset=utf-8");
}
} else if (facet != null && !facet.isEmpty() && rows == 0) {
QueryResponse qr = indexManager.query(qrBuilder.build());
StringBuilder response = new StringBuilder();
response.append("facetName,facetKey,facetValue\n");
for (FacetGroup facetGroup : qr.getFacetGroupList()) {
for (FacetCount facetCount : facetGroup.getFacetCountList()) {
response.append(facetGroup.getCountRequest().getFacetField().getLabel());
response.append(",");
response.append("\"").append(facetCount.getFacet()).append("\"");
response.append(",");
response.append(Long.valueOf(facetCount.getCount()));
response.append("\n");
}
}
return HttpResponse.ok(response.toString()).status(ZuliaConstants.SUCCESS).contentType(MediaType.TEXT_PLAIN + ";charset=utf-8");
} else {
return HttpResponse.ok("Please specify fields to be exported i.e. fl=title&fl=abstract or the facets to be exported i.e. facet=issn&facet=pubYear&rows=0").status(ZuliaConstants.SUCCESS).contentType(MediaType.TEXT_PLAIN + ";charset=utf-8");
}
}
} catch (Exception e) {
LOG.log(Level.SEVERE, e.getMessage(), e);
return HttpResponse.serverError(e.getClass().getSimpleName() + ":" + e.getMessage()).status(ZuliaConstants.INTERNAL_ERROR);
}
}
use of io.zulia.message.ZuliaBase.Similarity in project zuliasearch by zuliaio.
the class ZuliaIndex method internalQuery.
public IndexShardResponse internalQuery(Query query, final InternalQueryRequest internalQueryRequest) throws Exception {
QueryRequest queryRequest = internalQueryRequest.getQueryRequest();
Set<ZuliaShard> shardsForQuery = new HashSet<>();
for (IndexRouting indexRouting : internalQueryRequest.getIndexRoutingList()) {
if (indexRouting.getIndex().equals(indexName)) {
List<ZuliaShard> shardsFromRouting = getShardsFromRouting(indexRouting, queryRequest.getMasterSlaveSettings());
shardsForQuery.addAll(shardsFromRouting);
}
}
int amount = queryRequest.getAmount() + queryRequest.getStart();
if (indexConfig.getNumberOfShards() != 1) {
if (!queryRequest.getFetchFull() && (amount > 0)) {
amount = (int) (((amount / numberOfShards) + indexConfig.getIndexSettings().getMinShardRequest()) * indexConfig.getIndexSettings().getRequestFactor());
}
}
final int requestedAmount = amount;
final HashMap<Integer, FieldDoc> lastScoreDocMap = new HashMap<>();
FieldDoc after;
ZuliaQuery.LastResult lr = queryRequest.getLastResult();
for (ZuliaQuery.LastIndexResult lir : lr.getLastIndexResultList()) {
if (indexName.equals(lir.getIndexName())) {
for (ZuliaQuery.ScoredResult sr : lir.getLastForShardList()) {
int luceneShardId = sr.getLuceneShardId();
float score = sr.getScore();
SortRequest sortRequest = queryRequest.getSortRequest();
Object[] sortTerms = new Object[sortRequest.getFieldSortCount()];
int sortTermsIndex = 0;
ZuliaQuery.SortValues sortValues = sr.getSortValues();
for (ZuliaQuery.FieldSort fs : sortRequest.getFieldSortList()) {
String sortField = fs.getSortField();
FieldConfig.FieldType sortType = indexConfig.getFieldTypeForSortField(sortField);
if (!ZuliaParser.rewriteLengthFields(sortField).equals(sortField)) {
sortType = FieldConfig.FieldType.NUMERIC_LONG;
}
if (sortType == null) {
throw new Exception(sortField + " is not defined as a sortable field");
}
ZuliaQuery.SortValue sortValue = sortValues.getSortValue(sortTermsIndex);
if (ZuliaConstants.SCORE_FIELD.equals(sortField)) {
sortTerms[sortTermsIndex] = sortValue.getFloatValue();
} else if (sortValue.getExists()) {
if (FieldTypeUtil.isNumericOrDateFieldType(sortType)) {
if (FieldTypeUtil.isNumericIntFieldType(sortType)) {
sortTerms[sortTermsIndex] = sortValue.getIntegerValue();
} else if (FieldTypeUtil.isNumericLongFieldType(sortType)) {
sortTerms[sortTermsIndex] = sortValue.getLongValue();
} else if (FieldTypeUtil.isNumericFloatFieldType(sortType)) {
sortTerms[sortTermsIndex] = sortValue.getFloatValue();
} else if (FieldTypeUtil.isNumericDoubleFieldType(sortType)) {
sortTerms[sortTermsIndex] = sortValue.getDoubleValue();
} else if (FieldTypeUtil.isDateFieldType(sortType)) {
sortTerms[sortTermsIndex] = sortValue.getDateValue();
} else {
throw new Exception("Invalid numeric sort type <" + sortType + "> for sort field <" + sortField + ">");
}
} else {
// string
sortTerms[sortTermsIndex] = new BytesRef(sortValue.getStringValue());
}
} else {
sortTerms[sortTermsIndex] = null;
}
sortTermsIndex++;
}
after = new FieldDoc(luceneShardId, score, sortTerms, sr.getShard());
lastScoreDocMap.put(sr.getShard(), after);
}
}
}
Map<String, Similarity> fieldSimilarityMap = new HashMap<>();
for (FieldSimilarity fieldSimilarity : queryRequest.getFieldSimilarityList()) {
fieldSimilarityMap.put(fieldSimilarity.getField(), fieldSimilarity.getSimilarity());
}
for (ZuliaQuery.Query cosineSimQuery : queryRequest.getQueryList()) {
if (cosineSimQuery.getQueryType() == ZuliaQuery.Query.QueryType.VECTOR) {
for (String field : cosineSimQuery.getQfList()) {
io.zulia.message.ZuliaIndex.Superbit superbitConfig = indexConfig.getSuperBitConfigForField(field);
int sigLength = superbitConfig.getInputDim() * superbitConfig.getBatches();
for (int i = 0; i < sigLength; i++) {
String fieldName = ZuliaConstants.SUPERBIT_PREFIX + "." + field + "." + i;
fieldSimilarityMap.put(fieldName, Similarity.CONSTANT);
}
}
}
}
IndexShardResponse.Builder builder = IndexShardResponse.newBuilder();
List<Future<ShardQueryResponse>> responses = new ArrayList<>();
for (final ZuliaShard shard : shardsForQuery) {
Future<ShardQueryResponse> response = shardPool.submit(() -> {
QueryCacheKey queryCacheKey = null;
if (!queryRequest.getDontCache()) {
queryCacheKey = new QueryCacheKey(queryRequest);
}
return shard.queryShard(query, fieldSimilarityMap, requestedAmount, lastScoreDocMap.get(shard.getShardNumber()), queryRequest.getFacetRequest(), queryRequest.getSortRequest(), queryCacheKey, queryRequest.getResultFetchType(), queryRequest.getDocumentFieldsList(), queryRequest.getDocumentMaskedFieldsList(), queryRequest.getHighlightRequestList(), queryRequest.getAnalysisRequestList(), queryRequest.getDebug());
});
responses.add(response);
}
for (Future<ShardQueryResponse> response : responses) {
try {
ShardQueryResponse rs = response.get();
builder.addShardQueryResponse(rs);
} catch (ExecutionException e) {
Throwable t = e.getCause();
if (t instanceof OutOfMemoryError) {
throw (OutOfMemoryError) t;
}
throw ((Exception) e.getCause());
}
}
builder.setIndexName(indexName);
return builder.build();
}
Aggregations