use of org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition in project jackrabbit-oak by apache.
the class FulltextDocumentMaker method indexFunctionRestrictions.
private boolean indexFunctionRestrictions(String path, D fields, NodeState state) {
boolean fieldAdded = false;
for (PropertyDefinition pd : indexingRule.getFunctionRestrictions()) {
PropertyState functionValue = calculateValue(path, state, pd.functionCode);
if (functionValue != null) {
if (pd.ordered) {
addTypedOrderedFields(fields, functionValue, pd.function, pd);
}
addTypedFields(fields, functionValue, pd.function, pd);
fieldAdded = true;
}
}
return fieldAdded;
}
use of org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition in project jackrabbit-oak by apache.
the class SimSearchUtils method bruteForceFVRerank.
public static void bruteForceFVRerank(List<PropertyDefinition> sp, TopDocs docs, IndexSearcher indexSearcher) throws IOException {
double distSum = 0d;
double counter = 0d;
Map<Integer, Double> distances = new HashMap<>();
int k = 15;
// we assume the input doc is the first one returned
ScoreDoc inputDoc = docs.scoreDocs[0];
List<Integer> toDiscard = new LinkedList<>();
for (PropertyDefinition pd : sp) {
String fieldName = FieldNames.createBinSimilarityFieldName(pd.name);
BytesRef binaryValue = indexSearcher.doc(inputDoc.doc).getBinaryValue(fieldName);
if (binaryValue != null) {
double[] inputVector = toDoubleArray(binaryValue.bytes);
for (int j = 0; j < docs.scoreDocs.length; j++) {
BytesRef featureVectorBinary = indexSearcher.doc(docs.scoreDocs[j].doc).getBinaryValue(fieldName);
if (featureVectorBinary != null) {
double[] currentVector = toDoubleArray(featureVectorBinary.bytes);
// constant term to avoid division by zero
double distance = dist(inputVector, currentVector) + 1e-10;
if (Double.isNaN(distance) || Double.isInfinite(distance)) {
toDiscard.add(docs.scoreDocs[j].doc);
} else {
distSum += distance;
counter++;
distances.put(docs.scoreDocs[j].doc, distance);
docs.scoreDocs[j].score = (float) (1d / distance);
}
}
}
}
}
// remove docs having invalid distance
if (!toDiscard.isEmpty()) {
docs.scoreDocs = Arrays.stream(docs.scoreDocs).filter(e -> !toDiscard.contains(e.doc)).toArray(ScoreDoc[]::new);
}
// remove docs whose distance is one order of magnitude higher than average distance
final double distanceThreshold = 10 * distSum / counter;
docs.scoreDocs = Arrays.stream(docs.scoreDocs).filter(e -> distances.containsKey(e.doc) && distances.get(e.doc) < distanceThreshold).toArray(ScoreDoc[]::new);
// rerank scoreDocs
Arrays.parallelSort(docs.scoreDocs, 0, docs.scoreDocs.length, (o1, o2) -> {
return -1 * Double.compare(o1.score, o2.score);
});
// retain only the top k nearest neighbours
if (docs.scoreDocs.length > k) {
docs.scoreDocs = Arrays.copyOfRange(docs.scoreDocs, 0, k);
}
if (docs.scoreDocs.length > 0) {
docs.setMaxScore(docs.scoreDocs[0].score);
}
}
use of org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition in project jackrabbit-oak by apache.
the class LucenePropertyIndex method getLuceneRequest.
/**
* Get the Lucene query for the given filter.
*
* @param plan index plan containing filter details
* @param reader the Lucene reader
* @return the Lucene query
*/
private static LuceneRequestFacade getLuceneRequest(IndexPlan plan, IndexAugmentorFactory augmentorFactory, IndexReader reader) {
FulltextQueryTermsProvider augmentor = getIndexAgumentor(plan, augmentorFactory);
List<Query> qs = new ArrayList<>();
Filter filter = plan.getFilter();
FullTextExpression ft = filter.getFullTextConstraint();
PlanResult planResult = getPlanResult(plan);
LuceneIndexDefinition defn = (LuceneIndexDefinition) planResult.indexDefinition;
Analyzer analyzer = defn.getAnalyzer();
if (ft == null) {
// there might be no full-text constraint
// when using the LowCostLuceneIndexProvider
// which is used for testing
} else {
qs.add(getFullTextQuery(plan, ft, analyzer, augmentor));
}
// Check if native function is supported
PropertyRestriction pr = null;
if (defn.hasFunctionDefined()) {
pr = filter.getPropertyRestriction(defn.getFunctionName());
}
if (pr != null) {
String query = String.valueOf(pr.first.getValue(pr.first.getType()));
QueryParser queryParser = new QueryParser(VERSION, "", analyzer);
if (query.startsWith("mlt?")) {
String mltQueryString = query.replace("mlt?", "");
if (reader != null) {
List<PropertyDefinition> sp = new LinkedList<>();
for (IndexingRule r : defn.getDefinedRules()) {
sp.addAll(r.getSimilarityProperties());
}
if (sp.isEmpty()) {
Query moreLikeThis = MoreLikeThisHelper.getMoreLikeThis(reader, analyzer, mltQueryString);
if (moreLikeThis != null) {
qs.add(moreLikeThis);
}
} else {
Query similarityQuery = SimSearchUtils.getSimilarityQuery(sp, reader, mltQueryString);
if (similarityQuery != null) {
qs.add(similarityQuery);
}
}
}
} else if (query.startsWith("spellcheck?")) {
String spellcheckQueryString = query.replace("spellcheck?", "");
if (reader != null) {
return new LuceneRequestFacade<>(SpellcheckHelper.getSpellcheckQuery(spellcheckQueryString, reader));
}
} else if (query.startsWith("suggest?")) {
String suggestQueryString = query.replace("suggest?", "");
if (reader != null) {
return new LuceneRequestFacade<>(SuggestHelper.getSuggestQuery(suggestQueryString));
}
} else {
try {
qs.add(queryParser.parse(query));
} catch (ParseException e) {
throw new RuntimeException(e);
}
}
} else if (planResult.evaluateNonFullTextConstraints()) {
addNonFullTextConstraints(qs, plan, reader);
}
if (qs.size() == 0 && plan.getSortOrder() != null) {
// This case indicates that query just had order by and no
// property restriction defined. In this case property
// existence queries for each sort entry
List<OrderEntry> orders = removeNativeSort(plan.getSortOrder());
for (int i = 0; i < orders.size(); i++) {
OrderEntry oe = orders.get(i);
PropertyDefinition pd = planResult.getOrderedProperty(i);
PropertyRestriction orderRest = new PropertyRestriction();
orderRest.propertyName = oe.getPropertyName();
Query q = createQuery(oe.getPropertyName(), orderRest, pd);
if (q != null) {
qs.add(q);
}
}
}
if (qs.size() == 0) {
if (reader == null) {
// just return match all queries
return new LuceneRequestFacade<Query>(new MatchAllDocsQuery());
}
// be returned (if the index definition has a single rule)
if (planResult.evaluateNodeTypeRestriction()) {
return new LuceneRequestFacade<Query>(new MatchAllDocsQuery());
}
throw new IllegalStateException("No query created for filter " + filter);
}
return performAdditionalWraps(qs);
}
use of org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition in project jackrabbit-oak by apache.
the class LuceneIndex method isExcludedProperty.
private static boolean isExcludedProperty(PropertyRestriction pr, IndexingRule rule) {
String name = pr.propertyName;
if (name.contains("/")) {
// lucene cannot handle child-level property restrictions
return true;
}
PropertyDefinition pd = rule.getConfig(name);
// check name
if (pd == null || !pd.index) {
return true;
}
// check type
Integer type = null;
if (pr.first != null) {
type = pr.first.getType().tag();
} else if (pr.last != null) {
type = pr.last.getType().tag();
} else if (pr.list != null && !pr.list.isEmpty()) {
type = pr.list.get(0).getType().tag();
}
if (type != null) {
if (!includePropertyType(type, rule)) {
return true;
}
}
return false;
}
use of org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition in project jackrabbit-oak by apache.
the class LuceneIndexDefinitionTest method indexRuleWithPropertyRegEx.
@Test
public void indexRuleWithPropertyRegEx() throws Exception {
NodeBuilder rules = builder.child(INDEX_RULES);
rules.child("nt:folder");
TestUtil.child(rules, "nt:folder/properties/prop1").setProperty(FulltextIndexConstants.FIELD_BOOST, 3.0);
TestUtil.child(rules, "nt:folder/properties/prop2").setProperty(PROP_NAME, "foo.*").setProperty(FulltextIndexConstants.PROP_IS_REGEX, true).setProperty(FulltextIndexConstants.FIELD_BOOST, 4.0);
LuceneIndexDefinition defn = new LuceneIndexDefinition(root, builder.getNodeState(), "/foo");
IndexingRule rule1 = defn.getApplicableIndexingRule(asState(newNode("nt:folder")));
assertNotNull(rule1);
assertTrue(rule1.isIndexed("prop1"));
assertFalse(rule1.isIndexed("prop2"));
assertTrue(rule1.isIndexed("fooProp"));
PropertyDefinition pd = rule1.getConfig("fooProp2");
assertEquals(4.0f, pd.boost, 0);
}
Aggregations