Search in sources :

Example 1 with Match

use of org.exist.dom.persistent.Match in project exist by eXist-db.

the class LuceneMatchListener method reset.

protected void reset(final DBBroker broker, final NodeProxy proxy) {
    this.broker = broker;
    this.match = proxy.getMatches();
    setNextInChain(null);
    final IndexSpec indexConf = proxy.getOwnerDocument().getCollection().getIndexConfiguration(broker);
    if (indexConf != null) {
        config = (LuceneConfig) indexConf.getCustomIndexSpec(LuceneIndex.ID);
    } else {
        config = LuceneConfig.DEFAULT_CONFIG;
    }
    getTerms();
    nodesWithMatch = new TreeMap<>();
    /* Check if an index is defined on an ancestor of the current node.
        * If yes, scan the ancestor to get the offset of the first character
        * in the current node. For example, if the indexed node is &lt;a>abc&lt;b>de&lt;/b></a>
        * and we query for //a[text:ngram-contains(., 'de')]/b, proxy will be a &lt;b> node, but
        * the offsets of the matches are relative to the start of &lt;a>.
        */
    NodeSet ancestors = null;
    Match nextMatch = this.match;
    while (nextMatch != null) {
        if (proxy.getNodeId().isDescendantOf(nextMatch.getNodeId())) {
            if (ancestors == null) {
                ancestors = new NewArrayNodeSet();
            }
            ancestors.add(new NodeProxy(proxy.getOwnerDocument(), nextMatch.getNodeId()));
        }
        nextMatch = nextMatch.getNextMatch();
    }
    if (ancestors != null && !ancestors.isEmpty()) {
        for (final NodeProxy p : ancestors) {
            scanMatches(p);
        }
    }
}
Also used : NodeSet(org.exist.dom.persistent.NodeSet) NewArrayNodeSet(org.exist.dom.persistent.NewArrayNodeSet) NewArrayNodeSet(org.exist.dom.persistent.NewArrayNodeSet) IndexSpec(org.exist.storage.IndexSpec) NodeProxy(org.exist.dom.persistent.NodeProxy) Match(org.exist.dom.persistent.Match)

Example 2 with Match

use of org.exist.dom.persistent.Match in project exist by eXist-db.

the class LuceneMatchListener method getTerms.

/**
 * Get all query terms from the original queries.
 */
private void getTerms() {
    try {
        index.withReader(reader -> {
            final Set<Query> queries = new HashSet<>();
            termMap = new TreeMap<>();
            Match nextMatch = this.match;
            while (nextMatch != null) {
                if (nextMatch.getIndexId().equals(LuceneIndex.ID)) {
                    final Query query = ((LuceneMatch) nextMatch).getQuery();
                    if (!queries.contains(query)) {
                        queries.add(query);
                        LuceneUtil.extractTerms(query, termMap, reader, false);
                    }
                }
                nextMatch = nextMatch.getNextMatch();
            }
            return null;
        });
    } catch (final IOException e) {
        LOG.warn("Match listener caught IO exception while reading query tersm: {}", e.getMessage(), e);
    }
}
Also used : Query(org.apache.lucene.search.Query) PhraseQuery(org.apache.lucene.search.PhraseQuery) IOException(java.io.IOException) Match(org.exist.dom.persistent.Match)

Example 3 with Match

use of org.exist.dom.persistent.Match in project exist by eXist-db.

the class Score method eval.

public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
    NodeValue nodeValue = (NodeValue) args[0].itemAt(0);
    if (nodeValue.getImplementationType() != NodeValue.PERSISTENT_NODE) {
        return Sequence.EMPTY_SEQUENCE;
    }
    NodeProxy proxy = (NodeProxy) nodeValue;
    Match match = proxy.getMatches();
    float score = 0.0f;
    while (match != null) {
        if (match.getIndexId().equals(LuceneIndex.ID)) {
            float currentScore = ((LuceneMatch) match).getScore();
            score += currentScore;
        }
        match = match.getNextMatch();
    }
    return new FloatValue(score);
}
Also used : LuceneMatch(org.exist.indexing.lucene.LuceneMatch) NodeProxy(org.exist.dom.persistent.NodeProxy) Match(org.exist.dom.persistent.Match) LuceneMatch(org.exist.indexing.lucene.LuceneMatch)

Example 4 with Match

use of org.exist.dom.persistent.Match in project exist by eXist-db.

the class NGramSearch method getContinuousMatches.

/**
 * Finds all matches in head which are followed by matches in tail in the specified distance.
 *
 * @param head
 *            a nodeset with matches
 * @param tail
 *            another nodeset with matches
 * @return a nodeset containing all matches from the head which are directly followed by matches in the tail
 */
private NodeProxy getContinuousMatches(final NodeProxy head, final NodeProxy tail) {
    // NodeSet result = new ExtArrayNodeSet();
    Match continuousMatch = null;
    Match headMatch = head.getMatches();
    while (headMatch != null && continuousMatch == null) {
        Match tailMatch = tail.getMatches();
        while (tailMatch != null && continuousMatch == null) {
            continuousMatch = headMatch.continuedBy(tailMatch);
            tailMatch = tailMatch.getNextMatch();
        }
        headMatch = headMatch.getNextMatch();
    }
    if (continuousMatch != null) {
        NodeProxies.filterMatches(tail, match -> match.getContextId() != getExpressionId());
        tail.addMatch(continuousMatch);
        return tail;
    }
    return null;
}
Also used : Match(org.exist.dom.persistent.Match)

Example 5 with Match

use of org.exist.dom.persistent.Match in project exist by eXist-db.

the class NGramSearch method processMatches.

private NodeSet processMatches(NGramIndexWorker index, DocumentSet docs, List<QName> qnames, String query, NodeSet nodeSet, int axis) throws XPathException {
    EvaluatableExpression parsedQuery = null;
    if (getLocalName().equals("wildcard-contains"))
        parsedQuery = parseQuery(query);
    else
        parsedQuery = new FixedString(this, query);
    LOG.debug("Parsed Query: {}", parsedQuery);
    NodeSet result = parsedQuery.eval(index, docs, qnames, nodeSet, axis, this.getExpressionId());
    if (getLocalName().startsWith("starts-with"))
        result = NodeSets.getNodesMatchingAtStart(result, getExpressionId());
    else if (getLocalName().startsWith("ends-with"))
        result = NodeSets.getNodesMatchingAtEnd(result, getExpressionId());
    result = NodeSets.transformNodes(result, proxy -> NodeProxies.transformOwnMatches(proxy, Match::filterOutOverlappingOffsets, getExpressionId()));
    return result;
}
Also used : NodeSet(org.exist.dom.persistent.NodeSet) EmptyNodeSet(org.exist.dom.persistent.EmptyNodeSet) Match(org.exist.dom.persistent.Match) EvaluatableExpression(org.exist.xquery.modules.ngram.query.EvaluatableExpression) java.util(java.util) QName(org.exist.dom.QName) NodeProxy(org.exist.dom.persistent.NodeProxy) org.exist.xquery.value(org.exist.xquery.value) Wildcard(org.exist.xquery.modules.ngram.query.Wildcard) NodeSet(org.exist.dom.persistent.NodeSet) EmptyExpression(org.exist.xquery.modules.ngram.query.EmptyExpression) org.exist.xquery(org.exist.xquery) NodeProxies(org.exist.xquery.modules.ngram.utils.NodeProxies) NGramIndex(org.exist.indexing.ngram.NGramIndex) Matcher(java.util.regex.Matcher) NodeSets(org.exist.xquery.modules.ngram.utils.NodeSets) ElementValue(org.exist.storage.ElementValue) Error(org.exist.xquery.util.Error) DocumentSet(org.exist.dom.persistent.DocumentSet) AlternativeStrings(org.exist.xquery.modules.ngram.query.AlternativeStrings) StartAnchor(org.exist.xquery.modules.ngram.query.StartAnchor) EmptyNodeSet(org.exist.dom.persistent.EmptyNodeSet) NGramIndexWorker(org.exist.indexing.ngram.NGramIndexWorker) Logger(org.apache.logging.log4j.Logger) FixedString(org.exist.xquery.modules.ngram.query.FixedString) EndAnchor(org.exist.xquery.modules.ngram.query.EndAnchor) WildcardedExpressionSequence(org.exist.xquery.modules.ngram.query.WildcardedExpressionSequence) Pattern(java.util.regex.Pattern) WildcardedExpression(org.exist.xquery.modules.ngram.query.WildcardedExpression) LogManager(org.apache.logging.log4j.LogManager) EvaluatableExpression(org.exist.xquery.modules.ngram.query.EvaluatableExpression) FixedString(org.exist.xquery.modules.ngram.query.FixedString) Match(org.exist.dom.persistent.Match)

Aggregations

Match (org.exist.dom.persistent.Match)12 NodeProxy (org.exist.dom.persistent.NodeProxy)7 IOException (java.io.IOException)3 Query (org.apache.lucene.search.Query)2 NodeSet (org.exist.dom.persistent.NodeSet)2 LuceneMatch (org.exist.indexing.lucene.LuceneMatch)2 java.util (java.util)1 IdentityHashMap (java.util.IdentityHashMap)1 Matcher (java.util.regex.Matcher)1 Pattern (java.util.regex.Pattern)1 XMLStreamException (javax.xml.stream.XMLStreamException)1 XMLStreamReader (javax.xml.stream.XMLStreamReader)1 LogManager (org.apache.logging.log4j.LogManager)1 Logger (org.apache.logging.log4j.Logger)1 PhraseQuery (org.apache.lucene.search.PhraseQuery)1 QName (org.exist.dom.QName)1 DocumentSet (org.exist.dom.persistent.DocumentSet)1 EmptyNodeSet (org.exist.dom.persistent.EmptyNodeSet)1 NewArrayNodeSet (org.exist.dom.persistent.NewArrayNodeSet)1 NGramIndex (org.exist.indexing.ngram.NGramIndex)1