Search in sources :

Example 1 with IndexableFeatureConjunction

use of com.yahoo.search.predicate.index.conjunction.IndexableFeatureConjunction in project vespa by vespa-engine.

the class PredicateTreeAnnotator method assignIntervalLabels.

/**
 * Visits the predicate tree in depth-first order and assigns intervals for features in
 * {@link com.yahoo.document.predicate.FeatureSet} and {@link com.yahoo.document.predicate.FeatureRange}.
 */
private static void assignIntervalLabels(Predicate predicate, int begin, int end, boolean isNegated, AnnotatorContext context) {
    // Otherwise, conjunctions and disjunctions must be switched if negated (De Morgan's law).
    if (predicate instanceof Conjunction) {
        List<Predicate> children = ((Conjunction) predicate).getOperands();
        int current = begin;
        for (int i = 0; i < children.size(); i++) {
            Predicate child = children.get(i);
            int subTreeSize = context.subTreeSizes.get(child);
            if (i == children.size() - 1) {
                // Last child (and sometimes the only one)
                assignIntervalLabels(child, current, end, isNegated, context);
            // No need to update/touch current since this is the last child.
            } else if (i == 0) {
                // First child
                int next = context.leftNodeLeaves + subTreeSize + 1;
                assignIntervalLabels(child, current, next - 1, isNegated, context);
                current = next;
            } else {
                // Middle children
                int next = current + subTreeSize;
                assignIntervalLabels(child, current, next - 1, isNegated, context);
                current = next;
            }
        }
    } else if (predicate instanceof FeatureConjunction) {
        // Register FeatureConjunction as it was a FeatureSet with a single child.
        // Note: FeatureConjunction should never be negated as AndOrSimplifier will push negations down to
        // the leafs (FeatureSets).
        int zStarEnd = isNegated ? calculateZStarIntervalEnd(end, context) : end;
        IndexableFeatureConjunction indexable = new IndexableFeatureConjunction((FeatureConjunction) predicate);
        int interval = Interval.fromBoundaries(begin, zStarEnd);
        context.featureConjunctions.computeIfAbsent(indexable, (k) -> new ArrayList<>()).add(interval);
        if (isNegated) {
            registerZStarInterval(begin, end, zStarEnd, context);
        }
        context.leftNodeLeaves += 1;
    } else if (predicate instanceof Disjunction) {
        // the values will be same as that of the parent OR node
        for (Predicate child : ((Disjunction) predicate).getOperands()) {
            assignIntervalLabels(child, begin, end, isNegated, context);
        }
    } else if (predicate instanceof FeatureSet) {
        FeatureSet featureSet = (FeatureSet) predicate;
        int zStarEnd = isNegated ? calculateZStarIntervalEnd(end, context) : end;
        for (String value : featureSet.getValues()) {
            long featureHash = Feature.createHash(featureSet.getKey(), value);
            int interval = Interval.fromBoundaries(begin, zStarEnd);
            registerFeatureInterval(featureHash, interval, context.intervals);
        }
        if (isNegated) {
            registerZStarInterval(begin, end, zStarEnd, context);
        }
        context.leftNodeLeaves += 1;
    } else if (predicate instanceof Negation) {
        assignIntervalLabels(((Negation) predicate).getOperand(), begin, end, !isNegated, context);
    } else if (predicate instanceof FeatureRange) {
        FeatureRange featureRange = (FeatureRange) predicate;
        int zStarEnd = isNegated ? calculateZStarIntervalEnd(end, context) : end;
        int interval = Interval.fromBoundaries(begin, zStarEnd);
        for (RangePartition partition : featureRange.getPartitions()) {
            long featureHash = PredicateHash.hash64(partition.getLabel());
            registerFeatureInterval(featureHash, interval, context.intervals);
        }
        for (RangeEdgePartition edgePartition : featureRange.getEdgePartitions()) {
            long featureHash = PredicateHash.hash64(edgePartition.getLabel());
            IntervalWithBounds intervalWithBounds = new IntervalWithBounds(interval, (int) edgePartition.encodeBounds());
            registerFeatureInterval(featureHash, intervalWithBounds, context.intervalsWithBounds);
        }
        if (isNegated) {
            registerZStarInterval(begin, end, zStarEnd, context);
        }
        context.leftNodeLeaves += 1;
    } else {
        throw new UnsupportedOperationException("Cannot handle predicate of type " + predicate.getClass().getSimpleName());
    }
}
Also used : RangePartition(com.yahoo.document.predicate.RangePartition) IndexableFeatureConjunction(com.yahoo.search.predicate.index.conjunction.IndexableFeatureConjunction) Negation(com.yahoo.document.predicate.Negation) Predicate(com.yahoo.document.predicate.Predicate) Disjunction(com.yahoo.document.predicate.Disjunction) FeatureConjunction(com.yahoo.document.predicate.FeatureConjunction) IndexableFeatureConjunction(com.yahoo.search.predicate.index.conjunction.IndexableFeatureConjunction) FeatureConjunction(com.yahoo.document.predicate.FeatureConjunction) IndexableFeatureConjunction(com.yahoo.search.predicate.index.conjunction.IndexableFeatureConjunction) Conjunction(com.yahoo.document.predicate.Conjunction) FeatureRange(com.yahoo.document.predicate.FeatureRange) FeatureSet(com.yahoo.document.predicate.FeatureSet) RangeEdgePartition(com.yahoo.document.predicate.RangeEdgePartition) IntervalWithBounds(com.yahoo.search.predicate.index.IntervalWithBounds)

Example 2 with IndexableFeatureConjunction

use of com.yahoo.search.predicate.index.conjunction.IndexableFeatureConjunction in project vespa by vespa-engine.

the class PredicateTreeAnnotatorTest method require_that_featureconjunctions_are_registered_and_given_an_interval.

@Test
public void require_that_featureconjunctions_are_registered_and_given_an_interval() {
    Predicate p = and(or(range("key", partition("key=10-19"), partition("key=20-29"), edgePartition("key=0", 5, 10, 20), edgePartition("key=30", 0, 0, 3)), conj(not(feature("keyA").inSet("C")), feature("keyB").inSet("D"))), feature("foo").inSet("bar"));
    PredicateTreeAnnotations r = PredicateTreeAnnotator.createPredicateTreeAnnotations(p);
    assertEquals(2, r.minFeature);
    assertEquals(3, r.intervalEnd);
    assertEquals(3, r.intervalMap.size());
    assertEquals(2, r.boundsMap.size());
    assertEquals(1, r.featureConjunctions.size());
    Map.Entry<IndexableFeatureConjunction, List<Integer>> entry = r.featureConjunctions.entrySet().iterator().next();
    assertEquals(1, entry.getValue().size());
    assertEquals(0b1_0000000000000010, entry.getValue().get(0).longValue());
}
Also used : IndexableFeatureConjunction(com.yahoo.search.predicate.index.conjunction.IndexableFeatureConjunction) List(java.util.List) Map(java.util.Map) Predicate(com.yahoo.document.predicate.Predicate) Test(org.junit.Test)

Example 3 with IndexableFeatureConjunction

use of com.yahoo.search.predicate.index.conjunction.IndexableFeatureConjunction in project vespa by vespa-engine.

the class PredicateIndexBuilder method indexDocumentConjunctions.

private void indexDocumentConjunctions(int docId, Map<IndexableFeatureConjunction, List<Integer>> featureConjunctions) {
    for (Map.Entry<IndexableFeatureConjunction, List<Integer>> e : featureConjunctions.entrySet()) {
        IndexableFeatureConjunction fc = e.getKey();
        List<Integer> intervals = e.getValue();
        Posting posting = new Posting(docId, intervalStoreBuilder.insert(intervals));
        conjunctionIntervalIndexBuilder.insert(fc.id, posting);
        conjunctionIndexBuilder.indexConjunction(fc);
    }
}
Also used : IndexableFeatureConjunction(com.yahoo.search.predicate.index.conjunction.IndexableFeatureConjunction) Posting(com.yahoo.search.predicate.index.Posting) ArrayList(java.util.ArrayList) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) Map(java.util.Map) TreeMap(java.util.TreeMap)

Example 4 with IndexableFeatureConjunction

use of com.yahoo.search.predicate.index.conjunction.IndexableFeatureConjunction in project vespa by vespa-engine.

the class PredicateTreeAnalyzer method aggregatePredicateStatistics.

// First analysis pass. Traverses tree in depth-first order. Determines the sub-tree sizes and counts the occurrences
// of each feature (used by min-feature calculation in second pass).
// Returns the size of the analyzed subtree.
private static int aggregatePredicateStatistics(Predicate predicate, boolean isNegated, AnalyzerContext context) {
    if (predicate instanceof Negation) {
        return aggregatePredicateStatistics(((Negation) predicate).getOperand(), !isNegated, context);
    } else if (predicate instanceof Conjunction) {
        return ((Conjunction) predicate).getOperands().stream().mapToInt(child -> {
            int size = aggregatePredicateStatistics(child, isNegated, context);
            context.subTreeSizes.put(child, size);
            return size;
        }).sum();
    } else if (predicate instanceof FeatureConjunction) {
        if (isNegated) {
            context.hasNegationPredicate = true;
            return 2;
        }
        // Count the number of identical feature conjunctions - use the id from IndexableFeatureConjunction as key
        IndexableFeatureConjunction ifc = new IndexableFeatureConjunction((FeatureConjunction) predicate);
        incrementOccurrence(context.conjunctionOccurrences, ifc.id);
        // Handled as leaf in interval algorithm - count a single child
        return 1;
    } else if (predicate instanceof Disjunction) {
        return ((Disjunction) predicate).getOperands().stream().mapToInt(child -> aggregatePredicateStatistics(child, isNegated, context)).sum();
    } else if (predicate instanceof FeatureSet) {
        if (isNegated) {
            context.hasNegationPredicate = true;
            return 2;
        } else {
            FeatureSet featureSet = (FeatureSet) predicate;
            for (String value : featureSet.getValues()) {
                incrementOccurrence(context.featureOccurrences, Feature.createHash(featureSet.getKey(), value));
            }
            return 1;
        }
    } else if (predicate instanceof FeatureRange) {
        if (isNegated) {
            context.hasNegationPredicate = true;
            return 2;
        } else {
            incrementOccurrence(context.featureOccurrences, PredicateHash.hash64(((FeatureRange) predicate).getKey()));
            return 1;
        }
    } else {
        throw new UnsupportedOperationException("Cannot handle predicate of type " + predicate.getClass().getSimpleName());
    }
}
Also used : Disjunction(com.yahoo.document.predicate.Disjunction) Negation(com.yahoo.document.predicate.Negation) FeatureConjunction(com.yahoo.document.predicate.FeatureConjunction) IndexableFeatureConjunction(com.yahoo.search.predicate.index.conjunction.IndexableFeatureConjunction) IndexableFeatureConjunction(com.yahoo.search.predicate.index.conjunction.IndexableFeatureConjunction) FeatureConjunction(com.yahoo.document.predicate.FeatureConjunction) IndexableFeatureConjunction(com.yahoo.search.predicate.index.conjunction.IndexableFeatureConjunction) Conjunction(com.yahoo.document.predicate.Conjunction) FeatureRange(com.yahoo.document.predicate.FeatureRange) FeatureSet(com.yahoo.document.predicate.FeatureSet)

Example 5 with IndexableFeatureConjunction

use of com.yahoo.search.predicate.index.conjunction.IndexableFeatureConjunction in project vespa by vespa-engine.

the class PredicateTreeAnalyzer method findMinFeature.

// Second analysis pass. Traverses tree in depth-first order. Determines the min-feature value.
private static double findMinFeature(Predicate predicate, boolean isNegated, AnalyzerContext context) {
    if (predicate instanceof Conjunction) {
        // Sum of children values.
        return ((Conjunction) predicate).getOperands().stream().mapToDouble(child -> findMinFeature(child, isNegated, context)).sum();
    } else if (predicate instanceof FeatureConjunction) {
        if (isNegated) {
            return 0.0;
        }
        // The FeatureConjunction is handled as a leaf node in the interval algorithm.
        IndexableFeatureConjunction ifc = new IndexableFeatureConjunction((FeatureConjunction) predicate);
        return 1.0 / context.conjunctionOccurrences.get(ifc.id);
    } else if (predicate instanceof Disjunction) {
        // Minimum value of children.
        return ((Disjunction) predicate).getOperands().stream().mapToDouble(child -> findMinFeature(child, isNegated, context)).min().getAsDouble();
    } else if (predicate instanceof Negation) {
        return findMinFeature(((Negation) predicate).getOperand(), !isNegated, context);
    } else if (predicate instanceof FeatureSet) {
        if (isNegated) {
            return 0.0;
        }
        double minFeature = 1.0;
        FeatureSet featureSet = (FeatureSet) predicate;
        for (String value : featureSet.getValues()) {
            long featureHash = Feature.createHash(featureSet.getKey(), value);
            // Clever mathematics to handle scenarios where same feature is used several places in predicate tree.
            minFeature = Math.min(minFeature, 1.0 / context.featureOccurrences.get(featureHash));
        }
        return minFeature;
    } else if (predicate instanceof FeatureRange) {
        if (isNegated) {
            return 0.0;
        }
        return 1.0 / context.featureOccurrences.get(PredicateHash.hash64(((FeatureRange) predicate).getKey()));
    } else {
        throw new UnsupportedOperationException("Cannot handle predicate of type " + predicate.getClass().getSimpleName());
    }
}
Also used : FeatureConjunction(com.yahoo.document.predicate.FeatureConjunction) FeatureRange(com.yahoo.document.predicate.FeatureRange) FeatureSet(com.yahoo.document.predicate.FeatureSet) Map(java.util.Map) IndexableFeatureConjunction(com.yahoo.search.predicate.index.conjunction.IndexableFeatureConjunction) Negation(com.yahoo.document.predicate.Negation) PredicateHash(com.yahoo.document.predicate.PredicateHash) HashMap(java.util.HashMap) Feature(com.yahoo.search.predicate.index.Feature) Conjunction(com.yahoo.document.predicate.Conjunction) Disjunction(com.yahoo.document.predicate.Disjunction) Predicate(com.yahoo.document.predicate.Predicate) Disjunction(com.yahoo.document.predicate.Disjunction) FeatureConjunction(com.yahoo.document.predicate.FeatureConjunction) IndexableFeatureConjunction(com.yahoo.search.predicate.index.conjunction.IndexableFeatureConjunction) IndexableFeatureConjunction(com.yahoo.search.predicate.index.conjunction.IndexableFeatureConjunction) Negation(com.yahoo.document.predicate.Negation) FeatureConjunction(com.yahoo.document.predicate.FeatureConjunction) IndexableFeatureConjunction(com.yahoo.search.predicate.index.conjunction.IndexableFeatureConjunction) Conjunction(com.yahoo.document.predicate.Conjunction) FeatureRange(com.yahoo.document.predicate.FeatureRange) FeatureSet(com.yahoo.document.predicate.FeatureSet)

Aggregations

IndexableFeatureConjunction (com.yahoo.search.predicate.index.conjunction.IndexableFeatureConjunction)5 Conjunction (com.yahoo.document.predicate.Conjunction)3 Disjunction (com.yahoo.document.predicate.Disjunction)3 FeatureConjunction (com.yahoo.document.predicate.FeatureConjunction)3 FeatureRange (com.yahoo.document.predicate.FeatureRange)3 FeatureSet (com.yahoo.document.predicate.FeatureSet)3 Negation (com.yahoo.document.predicate.Negation)3 Predicate (com.yahoo.document.predicate.Predicate)3 Map (java.util.Map)3 List (java.util.List)2 PredicateHash (com.yahoo.document.predicate.PredicateHash)1 RangeEdgePartition (com.yahoo.document.predicate.RangeEdgePartition)1 RangePartition (com.yahoo.document.predicate.RangePartition)1 Feature (com.yahoo.search.predicate.index.Feature)1 IntervalWithBounds (com.yahoo.search.predicate.index.IntervalWithBounds)1 Posting (com.yahoo.search.predicate.index.Posting)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 TreeMap (java.util.TreeMap)1 Collectors.toList (java.util.stream.Collectors.toList)1