use of com.yelp.nrtsearch.server.luceneserver.field.IndexableFieldDef in project nrtsearch by Yelp.
the class RegisterFieldsHandler method handle.
/* Sets the FieldDef for every field specified in FieldDefRequest and saves it in IndexState.fields member
* returns the String representation of the same */
@Override
public FieldDefResponse handle(final IndexState indexState, FieldDefRequest fieldDefRequest) throws RegisterFieldsException {
assert indexState != null;
final Map<String, FieldDef> pendingFieldDefs = new HashMap<>();
final Map<String, String> saveStates = new HashMap<>();
Set<String> seen = new HashSet<>();
// request (or, from the saved json):
for (int pass = 0; pass < 2; pass++) {
List<Field> fields = fieldDefRequest.getFieldList();
for (Field currentField : fields) {
String fieldName = currentField.getName();
if (pass == 1 && seen.contains(fieldName)) {
continue;
}
;
if (pass == 0 && FieldType.VIRTUAL.equals(currentField.getType())) {
// request
continue;
}
if (!IndexState.isSimpleName(fieldName)) {
throw new RegisterFieldsException("invalid field name \"" + fieldName + "\": must be [a-zA-Z_][a-zA-Z0-9]*");
}
if (fieldName.endsWith("_boost")) {
throw new RegisterFieldsException("invalid field name \"" + fieldName + "\": field names cannot end with _boost");
}
if (seen.contains(fieldName)) {
throw new RegisterFieldsException("field \"" + fieldName + "\" appears at least twice in this request");
}
seen.add(fieldName);
try {
// convert Proto object to Json String
String currentFieldAsJsonString = JsonFormat.printer().print(currentField);
saveStates.put(fieldName, currentFieldAsJsonString);
} catch (InvalidProtocolBufferException e) {
throw new RuntimeException(e);
}
FieldDef fieldDef = parseOneFieldType(indexState, pendingFieldDefs, fieldName, currentField);
if (fieldDef instanceof IdFieldDef) {
verifyOnlyOneIdFieldExists(indexState, pendingFieldDefs, fieldDef);
}
pendingFieldDefs.put(fieldName, fieldDef);
if (fieldDef instanceof IndexableFieldDef) {
addChildFields((IndexableFieldDef) fieldDef, pendingFieldDefs);
}
}
}
// add FieldDef and its corresponding JsonObject to states variable in IndexState
for (Map.Entry<String, FieldDef> ent : pendingFieldDefs.entrySet()) {
if (IndexState.isChildName(ent.getKey())) {
// child field will be present in top level field json
indexState.addField(ent.getValue(), null);
} else {
JsonObject fieldAsJsonObject = jsonParser.parse(saveStates.get(ent.getKey())).getAsJsonObject();
indexState.addField(ent.getValue(), fieldAsJsonObject);
}
}
String response = indexState.getAllFieldsJSON();
FieldDefResponse reply = FieldDefResponse.newBuilder().setResponse(response).build();
return reply;
}
use of com.yelp.nrtsearch.server.luceneserver.field.IndexableFieldDef in project nrtsearch by Yelp.
the class RegisterFieldsHandler method parseOneFieldType.
private FieldDef parseOneFieldType(IndexState indexState, Map<String, FieldDef> pendingFieldDefs, String fieldName, Field currentField) throws RegisterFieldsException {
if (FieldType.VIRTUAL.equals(currentField.getType())) {
return parseOneVirtualFieldType(indexState, pendingFieldDefs, fieldName, currentField);
}
FieldDef fieldDef = FieldDefCreator.getInstance().createFieldDef(fieldName, currentField);
// are fully functional.
if (fieldDef instanceof IndexableFieldDef) {
IndexableFieldDef indexableFieldDef = (IndexableFieldDef) fieldDef;
IndexableFieldDef.FacetValueType facetType = indexableFieldDef.getFacetValueType();
if (facetType != IndexableFieldDef.FacetValueType.NO_FACETS && facetType != IndexableFieldDef.FacetValueType.NUMERIC_RANGE) {
// hierarchy, float or sortedSetDocValues
if (facetType == IndexableFieldDef.FacetValueType.HIERARCHY) {
indexState.facetsConfig.setHierarchical(fieldName, true);
}
if (indexableFieldDef.isMultiValue()) {
indexState.facetsConfig.setMultiValued(fieldName, true);
}
// set indexFieldName for HIERARCHY (TAXO), SORTED_SET_DOC_VALUE and FLAT facet
String facetFieldName = currentField.getFacetIndexFieldName().isEmpty() ? String.format("$_%s", currentField.getName()) : currentField.getFacetIndexFieldName();
indexState.facetsConfig.setIndexFieldName(fieldName, facetFieldName);
}
}
// nocommit facetsConfig.setRequireDimCount
logger.info("REGISTER: " + fieldName + " -> " + fieldDef);
return fieldDef;
}
use of com.yelp.nrtsearch.server.luceneserver.field.IndexableFieldDef in project nrtsearch by Yelp.
the class DrillSidewaysImpl method getFieldFacetResult.
private static com.yelp.nrtsearch.server.grpc.FacetResult getFieldFacetResult(FacetsCollector drillDowns, Map<String, FacetsCollector> dsDimMap, ShardState shardState, Facet facet, Map<String, FieldDef> dynamicFields, SearcherTaxonomyManager.SearcherAndTaxonomy searcherAndTaxonomyManager, Map<String, Facets> indexFieldNameToFacets) throws IOException {
IndexState indexState = shardState.indexState;
String fieldName = facet.getDim();
FieldDef fieldDef = dynamicFields.get(fieldName);
if (fieldDef == null) {
throw new IllegalArgumentException(String.format("field %s was not registered and was not specified as a dynamic field ", fieldName));
}
FacetResult facetResult;
if (!(fieldDef instanceof IndexableFieldDef) && !(fieldDef instanceof VirtualFieldDef)) {
throw new IllegalArgumentException(String.format("field %s is neither a virtual field nor registered as an indexable field. Facets are supported only for these types", fieldName));
}
if (!facet.getNumericRangeList().isEmpty()) {
if (fieldDef.getFacetValueType() != IndexableFieldDef.FacetValueType.NUMERIC_RANGE) {
throw new IllegalArgumentException(String.format("field %s was not registered with facet=numericRange", fieldDef.getName()));
}
if (fieldDef instanceof IntFieldDef || fieldDef instanceof LongFieldDef) {
List<NumericRangeType> rangeList = facet.getNumericRangeList();
LongRange[] ranges = new LongRange[rangeList.size()];
for (int i = 0; i < ranges.length; i++) {
NumericRangeType numericRangeType = rangeList.get(i);
ranges[i] = new LongRange(numericRangeType.getLabel(), numericRangeType.getMin(), numericRangeType.getMinInclusive(), numericRangeType.getMax(), numericRangeType.getMaxInclusive());
}
FacetsCollector c = dsDimMap.get(fieldDef.getName());
if (c == null) {
c = drillDowns;
}
LongRangeFacetCounts longRangeFacetCounts = new LongRangeFacetCounts(fieldDef.getName(), c, ranges);
facetResult = longRangeFacetCounts.getTopChildren(0, fieldDef.getName(), facet.getPathsList().toArray(new String[facet.getPathsCount()]));
} else if (fieldDef instanceof FloatFieldDef) {
throw new IllegalArgumentException(String.format("field %s is of type float with FloatFieldDocValues which do not support numeric_range faceting", fieldDef.getName()));
} else if (fieldDef instanceof DoubleFieldDef || fieldDef instanceof VirtualFieldDef) {
List<NumericRangeType> rangeList = facet.getNumericRangeList();
DoubleRange[] ranges = new DoubleRange[rangeList.size()];
for (int i = 0; i < ranges.length; i++) {
NumericRangeType numericRangeType = rangeList.get(i);
ranges[i] = new DoubleRange(numericRangeType.getLabel(), numericRangeType.getMin(), numericRangeType.getMinInclusive(), numericRangeType.getMax(), numericRangeType.getMaxInclusive());
}
FacetsCollector c = dsDimMap.get(fieldDef.getName());
if (c == null) {
c = drillDowns;
}
DoubleRangeFacetCounts doubleRangeFacetCounts;
if (fieldDef instanceof VirtualFieldDef) {
VirtualFieldDef virtualFieldDef = (VirtualFieldDef) fieldDef;
doubleRangeFacetCounts = new DoubleRangeFacetCounts(virtualFieldDef.getName(), virtualFieldDef.getValuesSource(), c, ranges);
} else {
doubleRangeFacetCounts = new DoubleRangeFacetCounts(fieldDef.getName(), c, ranges);
}
facetResult = doubleRangeFacetCounts.getTopChildren(0, fieldDef.getName(), facet.getPathsList().toArray(new String[facet.getPathsCount()]));
} else {
throw new IllegalArgumentException(String.format("numericRanges must be provided only on field type numeric e.g. int, double, flat"));
}
} else if (fieldDef.getFacetValueType() == IndexableFieldDef.FacetValueType.SORTED_SET_DOC_VALUES) {
FacetsCollector c = dsDimMap.get(fieldDef.getName());
if (c == null) {
c = drillDowns;
}
if (facet.getLabelsCount() > 0) {
// filter facet if a label list is provided
FilteredSSDVFacetCounts filteredSSDVFacetCounts = new FilteredSSDVFacetCounts(facet.getLabelsList(), fieldDef.getName(), shardState.getSSDVState(searcherAndTaxonomyManager, fieldDef), c);
facetResult = filteredSSDVFacetCounts.getTopChildren(facet.getTopN(), fieldDef.getName());
} else {
SortedSetDocValuesFacetCounts sortedSetDocValuesFacetCounts = new SortedSetDocValuesFacetCounts(shardState.getSSDVState(searcherAndTaxonomyManager, fieldDef), c);
facetResult = sortedSetDocValuesFacetCounts.getTopChildren(facet.getTopN(), fieldDef.getName());
}
} else if (fieldDef.getFacetValueType() != IndexableFieldDef.FacetValueType.NO_FACETS) {
// Taxonomy facets
if (fieldDef.getFacetValueType() == IndexableFieldDef.FacetValueType.NUMERIC_RANGE) {
throw new IllegalArgumentException(String.format("%s was registered with facet = numericRange; must pass numericRanges in the request", fieldDef.getName()));
}
String[] path;
if (!facet.getPathsList().isEmpty()) {
ProtocolStringList pathList = facet.getPathsList();
path = new String[facet.getPathsList().size()];
for (int idx = 0; idx < path.length; idx++) {
path[idx] = pathList.get(idx);
}
} else {
path = new String[0];
}
FacetsCollector c = dsDimMap.get(fieldDef.getName());
boolean useCachedOrds = facet.getUseOrdsCache();
Facets luceneFacets;
if (c != null) {
// This dimension was used in
// drill-down; compute its facet counts from the
// drill-sideways collector:
String indexFieldName = indexState.facetsConfig.getDimConfig(fieldDef.getName()).indexFieldName;
if (useCachedOrds) {
luceneFacets = new TaxonomyFacetCounts(shardState.getOrdsCache(indexFieldName), searcherAndTaxonomyManager.taxonomyReader, indexState.facetsConfig, c);
} else {
luceneFacets = new FastTaxonomyFacetCounts(indexFieldName, searcherAndTaxonomyManager.taxonomyReader, indexState.facetsConfig, c);
}
} else {
// nocommit test both normal & ssdv facets in same index
// See if we already computed facet
// counts for this indexFieldName:
String indexFieldName = indexState.facetsConfig.getDimConfig(fieldDef.getName()).indexFieldName;
Map<String, Facets> facetsMap = indexFieldNameToFacets;
luceneFacets = facetsMap.get(indexFieldName);
if (luceneFacets == null) {
if (useCachedOrds) {
luceneFacets = new TaxonomyFacetCounts(shardState.getOrdsCache(indexFieldName), searcherAndTaxonomyManager.taxonomyReader, indexState.facetsConfig, drillDowns);
} else {
luceneFacets = new FastTaxonomyFacetCounts(indexFieldName, searcherAndTaxonomyManager.taxonomyReader, indexState.facetsConfig, drillDowns);
}
facetsMap.put(indexFieldName, luceneFacets);
}
}
if (facet.getTopN() != 0) {
facetResult = luceneFacets.getTopChildren(facet.getTopN(), fieldDef.getName(), path);
} else if (!facet.getLabelsList().isEmpty()) {
List<LabelAndValue> results = new ArrayList<LabelAndValue>();
for (String label : facet.getLabelsList()) {
results.add(new LabelAndValue(label, luceneFacets.getSpecificValue(fieldDef.getName(), label)));
}
facetResult = new FacetResult(fieldDef.getName(), path, -1, results.toArray(new LabelAndValue[results.size()]), -1);
} else {
throw new IllegalArgumentException(String.format("each facet request must have either topN or labels"));
}
} else {
// if no facet type is enabled on the field, try using the field doc values
if (!(fieldDef instanceof IndexableFieldDef)) {
throw new IllegalArgumentException("Doc values facet requires an indexable field : " + fieldName);
}
IndexableFieldDef indexableFieldDef = (IndexableFieldDef) fieldDef;
if (!indexableFieldDef.hasDocValues()) {
throw new IllegalArgumentException("Doc values facet requires doc values enabled : " + fieldName);
}
return getDocValuesFacetResult(facet, drillDowns, indexableFieldDef);
}
return buildFacetResultGrpc(facetResult, facet.getName());
}
use of com.yelp.nrtsearch.server.luceneserver.field.IndexableFieldDef in project nrtsearch by Yelp.
the class FacetTopDocs method facetFromTopDocs.
private static FacetResult facetFromTopDocs(TopDocs topDocs, Facet facet, IndexState indexState, IndexSearcher searcher) throws IOException {
FieldDef fieldDef = indexState.getField(facet.getDim());
if (!(fieldDef instanceof IndexableFieldDef)) {
throw new IllegalArgumentException("Sampling facet field must be indexable: " + facet.getDim());
}
IndexableFieldDef indexableFieldDef = (IndexableFieldDef) fieldDef;
if (!indexableFieldDef.hasDocValues()) {
throw new IllegalArgumentException("Sampling facet field must have doc values enabled: " + facet.getDim());
}
Map<Object, Integer> countsMap = new HashMap<>();
int totalDocs = 0;
int maxDoc = Math.min(topDocs.scoreDocs.length, facet.getSampleTopDocs());
List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
for (int i = 0; i < maxDoc; ++i) {
LeafReaderContext context = leaves.get(ReaderUtil.subIndex(topDocs.scoreDocs[i].doc, leaves));
LoadedDocValues<?> docValues = indexableFieldDef.getDocValues(context);
docValues.setDocId(topDocs.scoreDocs[i].doc - context.docBase);
if (docValues.isEmpty()) {
continue;
}
for (Object value : docValues) {
countsMap.merge(value, 1, Integer::sum);
}
totalDocs++;
}
return DrillSidewaysImpl.buildFacetResultFromCountsGrpc(countsMap, facet, totalDocs);
}
use of com.yelp.nrtsearch.server.luceneserver.field.IndexableFieldDef in project nrtsearch by Yelp.
the class SegmentDocLookup method get.
/**
* Get the {@link LoadedDocValues} for a given field. Creates a new instance or uses one from the
* cache. The data is loaded for the current set document id.
*
* @param key field name
* @return {@link LoadedDocValues} implementation for the given field
* @throws IllegalArgumentException if the field does not support doc values, if there is a
* problem setting the target doc id, or if the field does not exist in the index
* @throws NullPointerException if key is null
*/
@Override
public LoadedDocValues<?> get(Object key) {
Objects.requireNonNull(key);
String fieldName = key.toString();
LoadedDocValues<?> docValues = loaderCache.get(fieldName);
if (docValues == null) {
FieldDef fieldDef = indexState.getField(fieldName);
if (fieldDef == null) {
throw new IllegalArgumentException("Field does not exist: " + fieldName);
}
if (!(fieldDef instanceof IndexableFieldDef)) {
throw new IllegalArgumentException("Field cannot have doc values: " + fieldName);
}
IndexableFieldDef indexableFieldDef = (IndexableFieldDef) fieldDef;
try {
docValues = indexableFieldDef.getDocValues(context);
} catch (IOException e) {
throw new IllegalArgumentException("Could not get doc values for field: " + fieldName, e);
}
loaderCache.put(fieldName, docValues);
}
try {
docValues.setDocId(docId);
} catch (IOException e) {
throw new IllegalArgumentException("Could not set doc: " + docId + ", field: " + fieldName, e);
}
return docValues;
}
Aggregations