Search in sources :

Example 1 with PropertyDefinition

use of org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition in project jackrabbit-oak by apache.

the class FulltextDocumentMaker method indexFunctionRestrictions.

private boolean indexFunctionRestrictions(String path, D fields, NodeState state) {
    boolean fieldAdded = false;
    for (PropertyDefinition pd : indexingRule.getFunctionRestrictions()) {
        PropertyState functionValue = calculateValue(path, state, pd.functionCode);
        if (functionValue != null) {
            if (pd.ordered) {
                addTypedOrderedFields(fields, functionValue, pd.function, pd);
            }
            addTypedFields(fields, functionValue, pd.function, pd);
            fieldAdded = true;
        }
    }
    return fieldAdded;
}
Also used : PropertyDefinition(org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition) StringPropertyState(org.apache.jackrabbit.oak.plugins.memory.StringPropertyState) PropertyState(org.apache.jackrabbit.oak.api.PropertyState)

Example 2 with PropertyDefinition

use of org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition in project jackrabbit-oak by apache.

the class SimSearchUtils method bruteForceFVRerank.

public static void bruteForceFVRerank(List<PropertyDefinition> sp, TopDocs docs, IndexSearcher indexSearcher) throws IOException {
    double distSum = 0d;
    double counter = 0d;
    Map<Integer, Double> distances = new HashMap<>();
    int k = 15;
    // we assume the input doc is the first one returned
    ScoreDoc inputDoc = docs.scoreDocs[0];
    List<Integer> toDiscard = new LinkedList<>();
    for (PropertyDefinition pd : sp) {
        String fieldName = FieldNames.createBinSimilarityFieldName(pd.name);
        BytesRef binaryValue = indexSearcher.doc(inputDoc.doc).getBinaryValue(fieldName);
        if (binaryValue != null) {
            double[] inputVector = toDoubleArray(binaryValue.bytes);
            for (int j = 0; j < docs.scoreDocs.length; j++) {
                BytesRef featureVectorBinary = indexSearcher.doc(docs.scoreDocs[j].doc).getBinaryValue(fieldName);
                if (featureVectorBinary != null) {
                    double[] currentVector = toDoubleArray(featureVectorBinary.bytes);
                    // constant term to avoid division by zero
                    double distance = dist(inputVector, currentVector) + 1e-10;
                    if (Double.isNaN(distance) || Double.isInfinite(distance)) {
                        toDiscard.add(docs.scoreDocs[j].doc);
                    } else {
                        distSum += distance;
                        counter++;
                        distances.put(docs.scoreDocs[j].doc, distance);
                        docs.scoreDocs[j].score = (float) (1d / distance);
                    }
                }
            }
        }
    }
    // remove docs having invalid distance
    if (!toDiscard.isEmpty()) {
        docs.scoreDocs = Arrays.stream(docs.scoreDocs).filter(e -> !toDiscard.contains(e.doc)).toArray(ScoreDoc[]::new);
    }
    // remove docs whose distance is one order of magnitude higher than average distance
    final double distanceThreshold = 10 * distSum / counter;
    docs.scoreDocs = Arrays.stream(docs.scoreDocs).filter(e -> distances.containsKey(e.doc) && distances.get(e.doc) < distanceThreshold).toArray(ScoreDoc[]::new);
    // rerank scoreDocs
    Arrays.parallelSort(docs.scoreDocs, 0, docs.scoreDocs.length, (o1, o2) -> {
        return -1 * Double.compare(o1.score, o2.score);
    });
    // retain only the top k nearest neighbours
    if (docs.scoreDocs.length > k) {
        docs.scoreDocs = Arrays.copyOfRange(docs.scoreDocs, 0, k);
    }
    if (docs.scoreDocs.length > 0) {
        docs.setMaxScore(docs.scoreDocs[0].score);
    }
}
Also used : PropertyDefinition(org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition) ScoreDoc(org.apache.lucene.search.ScoreDoc) BytesRef(org.apache.lucene.util.BytesRef)

Example 3 with PropertyDefinition

use of org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition in project jackrabbit-oak by apache.

the class LucenePropertyIndex method getLuceneRequest.

/**
 * Get the Lucene query for the given filter.
 *
 * @param plan index plan containing filter details
 * @param reader the Lucene reader
 * @return the Lucene query
 */
private static LuceneRequestFacade getLuceneRequest(IndexPlan plan, IndexAugmentorFactory augmentorFactory, IndexReader reader) {
    FulltextQueryTermsProvider augmentor = getIndexAgumentor(plan, augmentorFactory);
    List<Query> qs = new ArrayList<>();
    Filter filter = plan.getFilter();
    FullTextExpression ft = filter.getFullTextConstraint();
    PlanResult planResult = getPlanResult(plan);
    LuceneIndexDefinition defn = (LuceneIndexDefinition) planResult.indexDefinition;
    Analyzer analyzer = defn.getAnalyzer();
    if (ft == null) {
    // there might be no full-text constraint
    // when using the LowCostLuceneIndexProvider
    // which is used for testing
    } else {
        qs.add(getFullTextQuery(plan, ft, analyzer, augmentor));
    }
    // Check if native function is supported
    PropertyRestriction pr = null;
    if (defn.hasFunctionDefined()) {
        pr = filter.getPropertyRestriction(defn.getFunctionName());
    }
    if (pr != null) {
        String query = String.valueOf(pr.first.getValue(pr.first.getType()));
        QueryParser queryParser = new QueryParser(VERSION, "", analyzer);
        if (query.startsWith("mlt?")) {
            String mltQueryString = query.replace("mlt?", "");
            if (reader != null) {
                List<PropertyDefinition> sp = new LinkedList<>();
                for (IndexingRule r : defn.getDefinedRules()) {
                    sp.addAll(r.getSimilarityProperties());
                }
                if (sp.isEmpty()) {
                    Query moreLikeThis = MoreLikeThisHelper.getMoreLikeThis(reader, analyzer, mltQueryString);
                    if (moreLikeThis != null) {
                        qs.add(moreLikeThis);
                    }
                } else {
                    Query similarityQuery = SimSearchUtils.getSimilarityQuery(sp, reader, mltQueryString);
                    if (similarityQuery != null) {
                        qs.add(similarityQuery);
                    }
                }
            }
        } else if (query.startsWith("spellcheck?")) {
            String spellcheckQueryString = query.replace("spellcheck?", "");
            if (reader != null) {
                return new LuceneRequestFacade<>(SpellcheckHelper.getSpellcheckQuery(spellcheckQueryString, reader));
            }
        } else if (query.startsWith("suggest?")) {
            String suggestQueryString = query.replace("suggest?", "");
            if (reader != null) {
                return new LuceneRequestFacade<>(SuggestHelper.getSuggestQuery(suggestQueryString));
            }
        } else {
            try {
                qs.add(queryParser.parse(query));
            } catch (ParseException e) {
                throw new RuntimeException(e);
            }
        }
    } else if (planResult.evaluateNonFullTextConstraints()) {
        addNonFullTextConstraints(qs, plan, reader);
    }
    if (qs.size() == 0 && plan.getSortOrder() != null) {
        // This case indicates that query just had order by and no
        // property restriction defined. In this case property
        // existence queries for each sort entry
        List<OrderEntry> orders = removeNativeSort(plan.getSortOrder());
        for (int i = 0; i < orders.size(); i++) {
            OrderEntry oe = orders.get(i);
            PropertyDefinition pd = planResult.getOrderedProperty(i);
            PropertyRestriction orderRest = new PropertyRestriction();
            orderRest.propertyName = oe.getPropertyName();
            Query q = createQuery(oe.getPropertyName(), orderRest, pd);
            if (q != null) {
                qs.add(q);
            }
        }
    }
    if (qs.size() == 0) {
        if (reader == null) {
            // just return match all queries
            return new LuceneRequestFacade<Query>(new MatchAllDocsQuery());
        }
        // be returned (if the index definition has a single rule)
        if (planResult.evaluateNodeTypeRestriction()) {
            return new LuceneRequestFacade<Query>(new MatchAllDocsQuery());
        }
        throw new IllegalStateException("No query created for filter " + filter);
    }
    return performAdditionalWraps(qs);
}
Also used : PlanResult(org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndexPlanner.PlanResult) Query(org.apache.lucene.search.Query) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) NumericRangeQuery(org.apache.lucene.search.NumericRangeQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) ArrayList(java.util.ArrayList) FulltextQueryTermsProvider(org.apache.jackrabbit.oak.plugins.index.lucene.spi.FulltextQueryTermsProvider) Analyzer(org.apache.lucene.analysis.Analyzer) IndexingRule(org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition.IndexingRule) PropertyRestriction(org.apache.jackrabbit.oak.spi.query.Filter.PropertyRestriction) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) PropertyDefinition(org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition) LinkedList(java.util.LinkedList) StandardQueryParser(org.apache.lucene.queryparser.flexible.standard.StandardQueryParser) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) Filter(org.apache.jackrabbit.oak.spi.query.Filter) FullTextExpression(org.apache.jackrabbit.oak.spi.query.fulltext.FullTextExpression) ParseException(org.apache.lucene.queryparser.classic.ParseException)

Example 4 with PropertyDefinition

use of org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition in project jackrabbit-oak by apache.

the class LuceneIndex method isExcludedProperty.

private static boolean isExcludedProperty(PropertyRestriction pr, IndexingRule rule) {
    String name = pr.propertyName;
    if (name.contains("/")) {
        // lucene cannot handle child-level property restrictions
        return true;
    }
    PropertyDefinition pd = rule.getConfig(name);
    // check name
    if (pd == null || !pd.index) {
        return true;
    }
    // check type
    Integer type = null;
    if (pr.first != null) {
        type = pr.first.getType().tag();
    } else if (pr.last != null) {
        type = pr.last.getType().tag();
    } else if (pr.list != null && !pr.list.isEmpty()) {
        type = pr.list.get(0).getType().tag();
    }
    if (type != null) {
        if (!includePropertyType(type, rule)) {
            return true;
        }
    }
    return false;
}
Also used : PropertyDefinition(org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition)

Example 5 with PropertyDefinition

use of org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition in project jackrabbit-oak by apache.

the class LuceneIndexDefinitionTest method indexRuleWithPropertyRegEx.

@Test
public void indexRuleWithPropertyRegEx() throws Exception {
    NodeBuilder rules = builder.child(INDEX_RULES);
    rules.child("nt:folder");
    TestUtil.child(rules, "nt:folder/properties/prop1").setProperty(FulltextIndexConstants.FIELD_BOOST, 3.0);
    TestUtil.child(rules, "nt:folder/properties/prop2").setProperty(PROP_NAME, "foo.*").setProperty(FulltextIndexConstants.PROP_IS_REGEX, true).setProperty(FulltextIndexConstants.FIELD_BOOST, 4.0);
    LuceneIndexDefinition defn = new LuceneIndexDefinition(root, builder.getNodeState(), "/foo");
    IndexingRule rule1 = defn.getApplicableIndexingRule(asState(newNode("nt:folder")));
    assertNotNull(rule1);
    assertTrue(rule1.isIndexed("prop1"));
    assertFalse(rule1.isIndexed("prop2"));
    assertTrue(rule1.isIndexed("fooProp"));
    PropertyDefinition pd = rule1.getConfig("fooProp2");
    assertEquals(4.0f, pd.boost, 0);
}
Also used : IndexingRule(org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition.IndexingRule) NodeBuilder(org.apache.jackrabbit.oak.spi.state.NodeBuilder) PropertyDefinition(org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition) LuceneIndexHelper.newLuceneIndexDefinition(org.apache.jackrabbit.oak.plugins.index.lucene.util.LuceneIndexHelper.newLuceneIndexDefinition) Test(org.junit.Test)

Aggregations

PropertyDefinition (org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition)35 IndexDefinition (org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition)11 HashMap (java.util.HashMap)10 IndexingRule (org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition.IndexingRule)10 PlanResult (org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndexPlanner.PlanResult)10 WildcardQuery (org.apache.lucene.search.WildcardQuery)10 Map (java.util.Map)9 PropertyState (org.apache.jackrabbit.oak.api.PropertyState)9 Filter (org.apache.jackrabbit.oak.spi.query.Filter)9 IOException (java.io.IOException)8 ArrayList (java.util.ArrayList)8 QueryStringQuery (co.elastic.clients.elasticsearch._types.query_dsl.QueryStringQuery)7 List (java.util.List)7 FieldValue (co.elastic.clients.elasticsearch._types.FieldValue)6 SortOptions (co.elastic.clients.elasticsearch._types.SortOptions)6 SortOrder (co.elastic.clients.elasticsearch._types.SortOrder)6 SuggestMode (co.elastic.clients.elasticsearch._types.SuggestMode)6 Aggregation (co.elastic.clients.elasticsearch._types.aggregations.Aggregation)6 BoolQuery (co.elastic.clients.elasticsearch._types.query_dsl.BoolQuery)6 ChildScoreMode (co.elastic.clients.elasticsearch._types.query_dsl.ChildScoreMode)6