Search in sources :

Example 1 with IndexableFieldDef

use of com.yelp.nrtsearch.server.luceneserver.field.IndexableFieldDef in project nrtsearch by Yelp.

the class RegisterFieldsHandler method handle.

/* Sets the FieldDef for every field specified in FieldDefRequest and saves it in IndexState.fields member
   * returns the String representation of the same */
@Override
public FieldDefResponse handle(final IndexState indexState, FieldDefRequest fieldDefRequest) throws RegisterFieldsException {
    assert indexState != null;
    final Map<String, FieldDef> pendingFieldDefs = new HashMap<>();
    final Map<String, String> saveStates = new HashMap<>();
    Set<String> seen = new HashSet<>();
    // request (or, from the saved json):
    for (int pass = 0; pass < 2; pass++) {
        List<Field> fields = fieldDefRequest.getFieldList();
        for (Field currentField : fields) {
            String fieldName = currentField.getName();
            if (pass == 1 && seen.contains(fieldName)) {
                continue;
            }
            ;
            if (pass == 0 && FieldType.VIRTUAL.equals(currentField.getType())) {
                // request
                continue;
            }
            if (!IndexState.isSimpleName(fieldName)) {
                throw new RegisterFieldsException("invalid field name \"" + fieldName + "\": must be [a-zA-Z_][a-zA-Z0-9]*");
            }
            if (fieldName.endsWith("_boost")) {
                throw new RegisterFieldsException("invalid field name \"" + fieldName + "\": field names cannot end with _boost");
            }
            if (seen.contains(fieldName)) {
                throw new RegisterFieldsException("field \"" + fieldName + "\" appears at least twice in this request");
            }
            seen.add(fieldName);
            try {
                // convert Proto object to Json String
                String currentFieldAsJsonString = JsonFormat.printer().print(currentField);
                saveStates.put(fieldName, currentFieldAsJsonString);
            } catch (InvalidProtocolBufferException e) {
                throw new RuntimeException(e);
            }
            FieldDef fieldDef = parseOneFieldType(indexState, pendingFieldDefs, fieldName, currentField);
            if (fieldDef instanceof IdFieldDef) {
                verifyOnlyOneIdFieldExists(indexState, pendingFieldDefs, fieldDef);
            }
            pendingFieldDefs.put(fieldName, fieldDef);
            if (fieldDef instanceof IndexableFieldDef) {
                addChildFields((IndexableFieldDef) fieldDef, pendingFieldDefs);
            }
        }
    }
    // add FieldDef and its corresponding JsonObject to states variable in IndexState
    for (Map.Entry<String, FieldDef> ent : pendingFieldDefs.entrySet()) {
        if (IndexState.isChildName(ent.getKey())) {
            // child field will be present in top level field json
            indexState.addField(ent.getValue(), null);
        } else {
            JsonObject fieldAsJsonObject = jsonParser.parse(saveStates.get(ent.getKey())).getAsJsonObject();
            indexState.addField(ent.getValue(), fieldAsJsonObject);
        }
    }
    String response = indexState.getAllFieldsJSON();
    FieldDefResponse reply = FieldDefResponse.newBuilder().setResponse(response).build();
    return reply;
}
Also used : IdFieldDef(com.yelp.nrtsearch.server.luceneserver.field.IdFieldDef) HashMap(java.util.HashMap) IndexableFieldDef(com.yelp.nrtsearch.server.luceneserver.field.IndexableFieldDef) InvalidProtocolBufferException(com.google.protobuf.InvalidProtocolBufferException) JsonObject(com.google.gson.JsonObject) IdFieldDef(com.yelp.nrtsearch.server.luceneserver.field.IdFieldDef) VirtualFieldDef(com.yelp.nrtsearch.server.luceneserver.field.VirtualFieldDef) IndexableFieldDef(com.yelp.nrtsearch.server.luceneserver.field.IndexableFieldDef) FieldDef(com.yelp.nrtsearch.server.luceneserver.field.FieldDef) Field(com.yelp.nrtsearch.server.grpc.Field) FieldDefResponse(com.yelp.nrtsearch.server.grpc.FieldDefResponse) HashMap(java.util.HashMap) Map(java.util.Map) HashSet(java.util.HashSet)

Example 2 with IndexableFieldDef

use of com.yelp.nrtsearch.server.luceneserver.field.IndexableFieldDef in project nrtsearch by Yelp.

the class RegisterFieldsHandler method parseOneFieldType.

private FieldDef parseOneFieldType(IndexState indexState, Map<String, FieldDef> pendingFieldDefs, String fieldName, Field currentField) throws RegisterFieldsException {
    if (FieldType.VIRTUAL.equals(currentField.getType())) {
        return parseOneVirtualFieldType(indexState, pendingFieldDefs, fieldName, currentField);
    }
    FieldDef fieldDef = FieldDefCreator.getInstance().createFieldDef(fieldName, currentField);
    // are fully functional.
    if (fieldDef instanceof IndexableFieldDef) {
        IndexableFieldDef indexableFieldDef = (IndexableFieldDef) fieldDef;
        IndexableFieldDef.FacetValueType facetType = indexableFieldDef.getFacetValueType();
        if (facetType != IndexableFieldDef.FacetValueType.NO_FACETS && facetType != IndexableFieldDef.FacetValueType.NUMERIC_RANGE) {
            // hierarchy, float or sortedSetDocValues
            if (facetType == IndexableFieldDef.FacetValueType.HIERARCHY) {
                indexState.facetsConfig.setHierarchical(fieldName, true);
            }
            if (indexableFieldDef.isMultiValue()) {
                indexState.facetsConfig.setMultiValued(fieldName, true);
            }
            // set indexFieldName for HIERARCHY (TAXO), SORTED_SET_DOC_VALUE and FLAT  facet
            String facetFieldName = currentField.getFacetIndexFieldName().isEmpty() ? String.format("$_%s", currentField.getName()) : currentField.getFacetIndexFieldName();
            indexState.facetsConfig.setIndexFieldName(fieldName, facetFieldName);
        }
    }
    // nocommit facetsConfig.setRequireDimCount
    logger.info("REGISTER: " + fieldName + " -> " + fieldDef);
    return fieldDef;
}
Also used : IdFieldDef(com.yelp.nrtsearch.server.luceneserver.field.IdFieldDef) VirtualFieldDef(com.yelp.nrtsearch.server.luceneserver.field.VirtualFieldDef) IndexableFieldDef(com.yelp.nrtsearch.server.luceneserver.field.IndexableFieldDef) FieldDef(com.yelp.nrtsearch.server.luceneserver.field.FieldDef) IndexableFieldDef(com.yelp.nrtsearch.server.luceneserver.field.IndexableFieldDef)

Example 3 with IndexableFieldDef

use of com.yelp.nrtsearch.server.luceneserver.field.IndexableFieldDef in project nrtsearch by Yelp.

the class DrillSidewaysImpl method getFieldFacetResult.

private static com.yelp.nrtsearch.server.grpc.FacetResult getFieldFacetResult(FacetsCollector drillDowns, Map<String, FacetsCollector> dsDimMap, ShardState shardState, Facet facet, Map<String, FieldDef> dynamicFields, SearcherTaxonomyManager.SearcherAndTaxonomy searcherAndTaxonomyManager, Map<String, Facets> indexFieldNameToFacets) throws IOException {
    IndexState indexState = shardState.indexState;
    String fieldName = facet.getDim();
    FieldDef fieldDef = dynamicFields.get(fieldName);
    if (fieldDef == null) {
        throw new IllegalArgumentException(String.format("field %s was not registered and was not specified as a dynamic field ", fieldName));
    }
    FacetResult facetResult;
    if (!(fieldDef instanceof IndexableFieldDef) && !(fieldDef instanceof VirtualFieldDef)) {
        throw new IllegalArgumentException(String.format("field %s is neither a virtual field nor registered as an indexable field. Facets are supported only for these types", fieldName));
    }
    if (!facet.getNumericRangeList().isEmpty()) {
        if (fieldDef.getFacetValueType() != IndexableFieldDef.FacetValueType.NUMERIC_RANGE) {
            throw new IllegalArgumentException(String.format("field %s was not registered with facet=numericRange", fieldDef.getName()));
        }
        if (fieldDef instanceof IntFieldDef || fieldDef instanceof LongFieldDef) {
            List<NumericRangeType> rangeList = facet.getNumericRangeList();
            LongRange[] ranges = new LongRange[rangeList.size()];
            for (int i = 0; i < ranges.length; i++) {
                NumericRangeType numericRangeType = rangeList.get(i);
                ranges[i] = new LongRange(numericRangeType.getLabel(), numericRangeType.getMin(), numericRangeType.getMinInclusive(), numericRangeType.getMax(), numericRangeType.getMaxInclusive());
            }
            FacetsCollector c = dsDimMap.get(fieldDef.getName());
            if (c == null) {
                c = drillDowns;
            }
            LongRangeFacetCounts longRangeFacetCounts = new LongRangeFacetCounts(fieldDef.getName(), c, ranges);
            facetResult = longRangeFacetCounts.getTopChildren(0, fieldDef.getName(), facet.getPathsList().toArray(new String[facet.getPathsCount()]));
        } else if (fieldDef instanceof FloatFieldDef) {
            throw new IllegalArgumentException(String.format("field %s is of type float with FloatFieldDocValues which do not support numeric_range faceting", fieldDef.getName()));
        } else if (fieldDef instanceof DoubleFieldDef || fieldDef instanceof VirtualFieldDef) {
            List<NumericRangeType> rangeList = facet.getNumericRangeList();
            DoubleRange[] ranges = new DoubleRange[rangeList.size()];
            for (int i = 0; i < ranges.length; i++) {
                NumericRangeType numericRangeType = rangeList.get(i);
                ranges[i] = new DoubleRange(numericRangeType.getLabel(), numericRangeType.getMin(), numericRangeType.getMinInclusive(), numericRangeType.getMax(), numericRangeType.getMaxInclusive());
            }
            FacetsCollector c = dsDimMap.get(fieldDef.getName());
            if (c == null) {
                c = drillDowns;
            }
            DoubleRangeFacetCounts doubleRangeFacetCounts;
            if (fieldDef instanceof VirtualFieldDef) {
                VirtualFieldDef virtualFieldDef = (VirtualFieldDef) fieldDef;
                doubleRangeFacetCounts = new DoubleRangeFacetCounts(virtualFieldDef.getName(), virtualFieldDef.getValuesSource(), c, ranges);
            } else {
                doubleRangeFacetCounts = new DoubleRangeFacetCounts(fieldDef.getName(), c, ranges);
            }
            facetResult = doubleRangeFacetCounts.getTopChildren(0, fieldDef.getName(), facet.getPathsList().toArray(new String[facet.getPathsCount()]));
        } else {
            throw new IllegalArgumentException(String.format("numericRanges must be provided only on field type numeric e.g. int, double, flat"));
        }
    } else if (fieldDef.getFacetValueType() == IndexableFieldDef.FacetValueType.SORTED_SET_DOC_VALUES) {
        FacetsCollector c = dsDimMap.get(fieldDef.getName());
        if (c == null) {
            c = drillDowns;
        }
        if (facet.getLabelsCount() > 0) {
            // filter facet if a label list is provided
            FilteredSSDVFacetCounts filteredSSDVFacetCounts = new FilteredSSDVFacetCounts(facet.getLabelsList(), fieldDef.getName(), shardState.getSSDVState(searcherAndTaxonomyManager, fieldDef), c);
            facetResult = filteredSSDVFacetCounts.getTopChildren(facet.getTopN(), fieldDef.getName());
        } else {
            SortedSetDocValuesFacetCounts sortedSetDocValuesFacetCounts = new SortedSetDocValuesFacetCounts(shardState.getSSDVState(searcherAndTaxonomyManager, fieldDef), c);
            facetResult = sortedSetDocValuesFacetCounts.getTopChildren(facet.getTopN(), fieldDef.getName());
        }
    } else if (fieldDef.getFacetValueType() != IndexableFieldDef.FacetValueType.NO_FACETS) {
        // Taxonomy  facets
        if (fieldDef.getFacetValueType() == IndexableFieldDef.FacetValueType.NUMERIC_RANGE) {
            throw new IllegalArgumentException(String.format("%s was registered with facet = numericRange; must pass numericRanges in the request", fieldDef.getName()));
        }
        String[] path;
        if (!facet.getPathsList().isEmpty()) {
            ProtocolStringList pathList = facet.getPathsList();
            path = new String[facet.getPathsList().size()];
            for (int idx = 0; idx < path.length; idx++) {
                path[idx] = pathList.get(idx);
            }
        } else {
            path = new String[0];
        }
        FacetsCollector c = dsDimMap.get(fieldDef.getName());
        boolean useCachedOrds = facet.getUseOrdsCache();
        Facets luceneFacets;
        if (c != null) {
            // This dimension was used in
            // drill-down; compute its facet counts from the
            // drill-sideways collector:
            String indexFieldName = indexState.facetsConfig.getDimConfig(fieldDef.getName()).indexFieldName;
            if (useCachedOrds) {
                luceneFacets = new TaxonomyFacetCounts(shardState.getOrdsCache(indexFieldName), searcherAndTaxonomyManager.taxonomyReader, indexState.facetsConfig, c);
            } else {
                luceneFacets = new FastTaxonomyFacetCounts(indexFieldName, searcherAndTaxonomyManager.taxonomyReader, indexState.facetsConfig, c);
            }
        } else {
            // nocommit test both normal & ssdv facets in same index
            // See if we already computed facet
            // counts for this indexFieldName:
            String indexFieldName = indexState.facetsConfig.getDimConfig(fieldDef.getName()).indexFieldName;
            Map<String, Facets> facetsMap = indexFieldNameToFacets;
            luceneFacets = facetsMap.get(indexFieldName);
            if (luceneFacets == null) {
                if (useCachedOrds) {
                    luceneFacets = new TaxonomyFacetCounts(shardState.getOrdsCache(indexFieldName), searcherAndTaxonomyManager.taxonomyReader, indexState.facetsConfig, drillDowns);
                } else {
                    luceneFacets = new FastTaxonomyFacetCounts(indexFieldName, searcherAndTaxonomyManager.taxonomyReader, indexState.facetsConfig, drillDowns);
                }
                facetsMap.put(indexFieldName, luceneFacets);
            }
        }
        if (facet.getTopN() != 0) {
            facetResult = luceneFacets.getTopChildren(facet.getTopN(), fieldDef.getName(), path);
        } else if (!facet.getLabelsList().isEmpty()) {
            List<LabelAndValue> results = new ArrayList<LabelAndValue>();
            for (String label : facet.getLabelsList()) {
                results.add(new LabelAndValue(label, luceneFacets.getSpecificValue(fieldDef.getName(), label)));
            }
            facetResult = new FacetResult(fieldDef.getName(), path, -1, results.toArray(new LabelAndValue[results.size()]), -1);
        } else {
            throw new IllegalArgumentException(String.format("each facet request must have either topN or labels"));
        }
    } else {
        // if no facet type is enabled on the field, try using the field doc values
        if (!(fieldDef instanceof IndexableFieldDef)) {
            throw new IllegalArgumentException("Doc values facet requires an indexable field : " + fieldName);
        }
        IndexableFieldDef indexableFieldDef = (IndexableFieldDef) fieldDef;
        if (!indexableFieldDef.hasDocValues()) {
            throw new IllegalArgumentException("Doc values facet requires doc values enabled : " + fieldName);
        }
        return getDocValuesFacetResult(facet, drillDowns, indexableFieldDef);
    }
    return buildFacetResultGrpc(facetResult, facet.getName());
}
Also used : FastTaxonomyFacetCounts(org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts) Facets(org.apache.lucene.facet.Facets) IndexableFieldDef(com.yelp.nrtsearch.server.luceneserver.field.IndexableFieldDef) LabelAndValue(org.apache.lucene.facet.LabelAndValue) IndexState(com.yelp.nrtsearch.server.luceneserver.IndexState) VirtualFieldDef(com.yelp.nrtsearch.server.luceneserver.field.VirtualFieldDef) FastTaxonomyFacetCounts(org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts) TaxonomyFacetCounts(org.apache.lucene.facet.taxonomy.TaxonomyFacetCounts) DoubleFieldDef(com.yelp.nrtsearch.server.luceneserver.field.DoubleFieldDef) LongRange(org.apache.lucene.facet.range.LongRange) LongRangeFacetCounts(org.apache.lucene.facet.range.LongRangeFacetCounts) IntFieldDef(com.yelp.nrtsearch.server.luceneserver.field.IntFieldDef) ProtocolStringList(com.google.protobuf.ProtocolStringList) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) DoubleRangeFacetCounts(org.apache.lucene.facet.range.DoubleRangeFacetCounts) LongFieldDef(com.yelp.nrtsearch.server.luceneserver.field.LongFieldDef) NumericRangeType(com.yelp.nrtsearch.server.grpc.NumericRangeType) ProtocolStringList(com.google.protobuf.ProtocolStringList) FacetsCollector(org.apache.lucene.facet.FacetsCollector) FloatFieldDef(com.yelp.nrtsearch.server.luceneserver.field.FloatFieldDef) VirtualFieldDef(com.yelp.nrtsearch.server.luceneserver.field.VirtualFieldDef) IndexableFieldDef(com.yelp.nrtsearch.server.luceneserver.field.IndexableFieldDef) FieldDef(com.yelp.nrtsearch.server.luceneserver.field.FieldDef) IntFieldDef(com.yelp.nrtsearch.server.luceneserver.field.IntFieldDef) DoubleFieldDef(com.yelp.nrtsearch.server.luceneserver.field.DoubleFieldDef) LongFieldDef(com.yelp.nrtsearch.server.luceneserver.field.LongFieldDef) DoubleRange(org.apache.lucene.facet.range.DoubleRange) FloatFieldDef(com.yelp.nrtsearch.server.luceneserver.field.FloatFieldDef) SortedSetDocValuesFacetCounts(org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts) FacetResult(org.apache.lucene.facet.FacetResult)

Example 4 with IndexableFieldDef

use of com.yelp.nrtsearch.server.luceneserver.field.IndexableFieldDef in project nrtsearch by Yelp.

the class FacetTopDocs method facetFromTopDocs.

private static FacetResult facetFromTopDocs(TopDocs topDocs, Facet facet, IndexState indexState, IndexSearcher searcher) throws IOException {
    FieldDef fieldDef = indexState.getField(facet.getDim());
    if (!(fieldDef instanceof IndexableFieldDef)) {
        throw new IllegalArgumentException("Sampling facet field must be indexable: " + facet.getDim());
    }
    IndexableFieldDef indexableFieldDef = (IndexableFieldDef) fieldDef;
    if (!indexableFieldDef.hasDocValues()) {
        throw new IllegalArgumentException("Sampling facet field must have doc values enabled: " + facet.getDim());
    }
    Map<Object, Integer> countsMap = new HashMap<>();
    int totalDocs = 0;
    int maxDoc = Math.min(topDocs.scoreDocs.length, facet.getSampleTopDocs());
    List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
    for (int i = 0; i < maxDoc; ++i) {
        LeafReaderContext context = leaves.get(ReaderUtil.subIndex(topDocs.scoreDocs[i].doc, leaves));
        LoadedDocValues<?> docValues = indexableFieldDef.getDocValues(context);
        docValues.setDocId(topDocs.scoreDocs[i].doc - context.docBase);
        if (docValues.isEmpty()) {
            continue;
        }
        for (Object value : docValues) {
            countsMap.merge(value, 1, Integer::sum);
        }
        totalDocs++;
    }
    return DrillSidewaysImpl.buildFacetResultFromCountsGrpc(countsMap, facet, totalDocs);
}
Also used : IndexableFieldDef(com.yelp.nrtsearch.server.luceneserver.field.IndexableFieldDef) FieldDef(com.yelp.nrtsearch.server.luceneserver.field.FieldDef) HashMap(java.util.HashMap) IndexableFieldDef(com.yelp.nrtsearch.server.luceneserver.field.IndexableFieldDef) LeafReaderContext(org.apache.lucene.index.LeafReaderContext)

Example 5 with IndexableFieldDef

use of com.yelp.nrtsearch.server.luceneserver.field.IndexableFieldDef in project nrtsearch by Yelp.

the class SegmentDocLookup method get.

/**
 * Get the {@link LoadedDocValues} for a given field. Creates a new instance or uses one from the
 * cache. The data is loaded for the current set document id.
 *
 * @param key field name
 * @return {@link LoadedDocValues} implementation for the given field
 * @throws IllegalArgumentException if the field does not support doc values, if there is a
 *     problem setting the target doc id, or if the field does not exist in the index
 * @throws NullPointerException if key is null
 */
@Override
public LoadedDocValues<?> get(Object key) {
    Objects.requireNonNull(key);
    String fieldName = key.toString();
    LoadedDocValues<?> docValues = loaderCache.get(fieldName);
    if (docValues == null) {
        FieldDef fieldDef = indexState.getField(fieldName);
        if (fieldDef == null) {
            throw new IllegalArgumentException("Field does not exist: " + fieldName);
        }
        if (!(fieldDef instanceof IndexableFieldDef)) {
            throw new IllegalArgumentException("Field cannot have doc values: " + fieldName);
        }
        IndexableFieldDef indexableFieldDef = (IndexableFieldDef) fieldDef;
        try {
            docValues = indexableFieldDef.getDocValues(context);
        } catch (IOException e) {
            throw new IllegalArgumentException("Could not get doc values for field: " + fieldName, e);
        }
        loaderCache.put(fieldName, docValues);
    }
    try {
        docValues.setDocId(docId);
    } catch (IOException e) {
        throw new IllegalArgumentException("Could not set doc: " + docId + ", field: " + fieldName, e);
    }
    return docValues;
}
Also used : IndexableFieldDef(com.yelp.nrtsearch.server.luceneserver.field.IndexableFieldDef) FieldDef(com.yelp.nrtsearch.server.luceneserver.field.FieldDef) IndexableFieldDef(com.yelp.nrtsearch.server.luceneserver.field.IndexableFieldDef) IOException(java.io.IOException)

Aggregations

FieldDef (com.yelp.nrtsearch.server.luceneserver.field.FieldDef)5 IndexableFieldDef (com.yelp.nrtsearch.server.luceneserver.field.IndexableFieldDef)5 VirtualFieldDef (com.yelp.nrtsearch.server.luceneserver.field.VirtualFieldDef)3 IdFieldDef (com.yelp.nrtsearch.server.luceneserver.field.IdFieldDef)2 HashMap (java.util.HashMap)2 JsonObject (com.google.gson.JsonObject)1 InvalidProtocolBufferException (com.google.protobuf.InvalidProtocolBufferException)1 ProtocolStringList (com.google.protobuf.ProtocolStringList)1 Field (com.yelp.nrtsearch.server.grpc.Field)1 FieldDefResponse (com.yelp.nrtsearch.server.grpc.FieldDefResponse)1 NumericRangeType (com.yelp.nrtsearch.server.grpc.NumericRangeType)1 IndexState (com.yelp.nrtsearch.server.luceneserver.IndexState)1 DoubleFieldDef (com.yelp.nrtsearch.server.luceneserver.field.DoubleFieldDef)1 FloatFieldDef (com.yelp.nrtsearch.server.luceneserver.field.FloatFieldDef)1 IntFieldDef (com.yelp.nrtsearch.server.luceneserver.field.IntFieldDef)1 LongFieldDef (com.yelp.nrtsearch.server.luceneserver.field.LongFieldDef)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 HashSet (java.util.HashSet)1 LinkedList (java.util.LinkedList)1