Search in sources :

Example 1 with SpanRegexQueryWrapper

use of de.ids_mannheim.korap.query.wrap.SpanRegexQueryWrapper in project Krill by KorAP.

the class KrillQuery method _termFromJson.

// Deserialize koral:term
// TODO: Not optimal as it does not respect non-term
private SpanQueryWrapper _termFromJson(JsonNode json, boolean isSpan, RelationDirection direction) throws QueryException {
    if (!json.has("@type")) {
        throw new QueryException(701, "JSON-LD group has no @type attribute");
    }
    ;
    String termType = json.get("@type").asText();
    Boolean isTerm = termType.equals("koral:term") ? true : false;
    Boolean isCaseInsensitive = false;
    if (!json.has("key") || json.get("key").asText().length() < 1) {
        // why must it have an attr?
        if (!json.has("attr")) {
            // return new SpanRepetitionQueryWrapper();
            throw new QueryException(740, "Key definition is missing in term or span");
        }
    }
    ;
    // Empty koral:span hack
    if (isSpan) {
        isTerm = false;
    }
    ;
    // <legacy>
    if (json.has("caseInsensitive") && json.get("caseInsensitive").asBoolean()) {
        isCaseInsensitive = true;
    } else // Flags
    if (json.has("flags") && json.get("flags").isArray()) {
        Iterator<JsonNode> flags = json.get("flags").elements();
        while (flags.hasNext()) {
            String flag = flags.next().asText();
            if (flag.equals("flags:caseInsensitive")) {
                isCaseInsensitive = true;
            } else {
                this.addWarning(748, "Flag is unknown", flag);
            }
            ;
        }
        ;
    }
    ;
    StringBuilder value = new StringBuilder();
    if (direction != null)
        value.append(direction.value());
    if (json.has("foundry") && json.get("foundry").asText().length() > 0) {
        value.append(json.get("foundry").asText()).append('/');
    }
    ;
    // No default foundry defined
    if (json.has("layer") && json.get("layer").asText().length() > 0) {
        String layer = json.get("layer").asText();
        switch(layer) {
            case "lemma":
                layer = "l";
                break;
            case "pos":
                layer = "p";
                break;
            case "orth":
                // TODO: THIS IS AN UGLY HACK! AND SHOULD BE NAMED "SURFACE" or . OR *
                layer = ".";
                break;
            case "struct":
                layer = "s";
                break;
            case "const":
                layer = "c";
                break;
        }
        ;
        if (isCaseInsensitive && isTerm) {
            if (layer.equals("."))
                layer = "i";
            else {
                this.addWarning(767, "Case insensitivity is currently not supported for this layer");
            }
            ;
        }
        ;
        // Ignore foundry for orth layer
        if (layer.equals(".")) {
            layer = "s";
            value.setLength(0);
        } else if (layer.equals("i")) {
            value.setLength(0);
        }
        ;
        value.append(layer).append(':');
    }
    ;
    if (json.has("key") && json.get("key").asText().length() > 0) {
        String key = json.get("key").asText();
        value.append(isCaseInsensitive ? key.toLowerCase() : key);
    }
    ;
    if (json.has("value") && json.get("value").asText().length() > 0)
        value.append(':').append(json.get("value").asText());
    // Regular expression or wildcard
    if (isTerm) {
        String match = "match:eq";
        if (json.has("match")) {
            match = json.get("match").asText();
        }
        ;
        if (json.has("type")) {
            QueryBuilder qb = this.builder();
            // Branch on type
            switch(json.get("type").asText()) {
                case "type:regex":
                    {
                        // The regex can be rewritten to an any token
                        if (value.toString().matches("^[si]:\\.[\\+\\*]\\??$")) {
                            return new SpanRepetitionQueryWrapper();
                        }
                        ;
                        SpanRegexQueryWrapper srqw = qb.re(value.toString(), isCaseInsensitive);
                        if (match.equals("match:ne")) {
                            if (DEBUG)
                                log.trace("Term is negated");
                            // ssqw.makeNegative();
                            return this.builder().seg().without(srqw);
                        } else if (match.equals("match:eq")) {
                            return srqw;
                        }
                        throw new QueryException(741, "Match relation unknown");
                    }
                case "type:wildcard":
                    {
                        SpanWildcardQueryWrapper swcqw = qb.wc(value.toString(), isCaseInsensitive);
                        if (match.equals("match:ne")) {
                            if (DEBUG)
                                log.trace("Term is negated");
                            // ssqw.makeNegative();
                            return this.builder().seg().without(swcqw);
                        } else if (match.equals("match:eq")) {
                            return swcqw;
                        }
                        ;
                        throw new QueryException(741, "Match relation unknown");
                    }
                case "type:string":
                    break;
                default:
                    this.addWarning(746, "Term type is not supported - treated as a string");
            }
            ;
        }
        ;
        SpanSegmentQueryWrapper ssqw = this.builder().seg(value.toString());
        if (match.equals("match:ne")) {
            if (DEBUG)
                log.trace("Term is negated");
            ssqw.makeNegative();
            return this.builder().seg().without(ssqw);
        } else if (match.equals("match:eq")) {
            return ssqw;
        } else {
            throw new QueryException(741, "Match relation unknown");
        }
    }
    ;
    if (json.has("attr")) {
        JsonNode attrNode = json.get("attr");
        if (!attrNode.has("@type")) {
            throw new QueryException(701, "JSON-LD group has no @type attribute");
        }
        if (value.toString().isEmpty()) {
            return _createElementAttrFromJson(null, json, attrNode);
        // this.addWarning(771,
        // "Arbitraty elements with attributes are currently not supported.");
        } else {
            SpanQueryWrapper elementWithIdWrapper = this.builder().tag(value.toString());
            if (elementWithIdWrapper == null) {
                return null;
            }
            return _createElementAttrFromJson(elementWithIdWrapper, json, attrNode);
        }
    }
    ;
    return this.builder().tag(value.toString());
}
Also used : SpanRepetitionQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanRepetitionQueryWrapper) SpanWildcardQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanWildcardQueryWrapper) QueryException(de.ids_mannheim.korap.util.QueryException) SpanRegexQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanRegexQueryWrapper) SpanSegmentQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper) Iterator(java.util.Iterator) JsonNode(com.fasterxml.jackson.databind.JsonNode) QueryBuilder(de.ids_mannheim.korap.query.QueryBuilder) SpanQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanQueryWrapper)

Example 2 with SpanRegexQueryWrapper

use of de.ids_mannheim.korap.query.wrap.SpanRegexQueryWrapper in project Krill by KorAP.

the class TestSpanAlterQuery method spanAlterQuery5.

@Test
public void spanAlterQuery5() throws QueryException {
    SpanRegexQueryWrapper srequery = new SpanRegexQueryWrapper("field", "a[bc]d.?e");
    SpanAlterQueryWrapper ssaquery = new SpanAlterQueryWrapper("field");
    ssaquery.or("f").or(srequery).or("g");
    assertEquals("spanOr([field:f, SpanMultiTermQueryWrapper(field:/a[bc]d.?e/), field:g])", ssaquery.toQuery().toString());
}
Also used : SpanRegexQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanRegexQueryWrapper) SpanAlterQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanAlterQueryWrapper) Test(org.junit.Test)

Example 3 with SpanRegexQueryWrapper

use of de.ids_mannheim.korap.query.wrap.SpanRegexQueryWrapper in project Krill by KorAP.

the class TestSpanSegmentQuery method spanSegmentAlterQuery.

@Test
public void spanSegmentAlterQuery() throws QueryException {
    SpanSegmentQueryWrapper ssquery = new SpanSegmentQueryWrapper("field");
    assertNull(ssquery.toQuery());
    ssquery.with("a");
    assertEquals("field:a", ssquery.toQuery().toString());
    ssquery.with(new SpanAlterQueryWrapper("field", "c", "d"));
    ssquery.with(new SpanRegexQueryWrapper("field", "a.*b"));
    assertEquals("spanSegment(spanSegment(field:a, spanOr([field:c, field:d])), SpanMultiTermQueryWrapper(field:/a.*b/))", ssquery.toQuery().toString());
}
Also used : SpanRegexQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanRegexQueryWrapper) SpanSegmentQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper) SpanAlterQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanAlterQueryWrapper) Test(org.junit.Test)

Example 4 with SpanRegexQueryWrapper

use of de.ids_mannheim.korap.query.wrap.SpanRegexQueryWrapper in project Krill by KorAP.

the class TestSpanSegmentSequenceQuery method spanSegmentSequenceQueryPrepend3.

@Test
public void spanSegmentSequenceQueryPrepend3() throws QueryException {
    SpanSequenceQueryWrapper sssq = new SpanSequenceQueryWrapper("field", "c", "d");
    SpanRegexQueryWrapper ssreq = new SpanRegexQueryWrapper("field", "a.?b");
    sssq.prepend(ssreq);
    assertEquals("spanNext(spanNext(SpanMultiTermQueryWrapper(field:/a.?b/), field:c), field:d)", sssq.toQuery().toString());
}
Also used : SpanRegexQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanRegexQueryWrapper) SpanSequenceQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanSequenceQueryWrapper) Test(org.junit.Test)

Example 5 with SpanRegexQueryWrapper

use of de.ids_mannheim.korap.query.wrap.SpanRegexQueryWrapper in project Krill by KorAP.

the class TestIndex method indexLucene.

@Test
public void indexLucene() throws Exception {
    // Base analyzer for searching and indexing
    StandardAnalyzer analyzer = new StandardAnalyzer();
    // Based on
    // http://lucene.apache.org/core/4_0_0/core/org/apache/lucene/
    // analysis/Analyzer.html?is-external=true
    // Create configuration with base analyzer
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    // Add a document 1 with the correct fields
    IndexWriter w = new IndexWriter(index, config);
    Collection docs = initIndexer();
    @SuppressWarnings("unchecked") Iterator<Map<String, String>> i = (Iterator<Map<String, String>>) docs.iterator();
    for (; i.hasNext(); ) {
        addDoc(w, i.next());
    }
    ;
    assertEquals(3, w.numDocs());
    w.close();
    // Check directory
    DirectoryReader reader = DirectoryReader.open(index);
    assertEquals(docs.size(), reader.maxDoc());
    assertEquals(docs.size(), reader.numDocs());
    // Check searcher
    IndexSearcher searcher = new IndexSearcher(reader);
    // textClass
    // All texts of text class "news"
    assertEquals(2, searcher.search(new TermQuery(new Term("textClass", "news")), 10).totalHits);
    // textClass
    // All texts of text class "sports"
    assertEquals(2, searcher.search(new TermQuery(new Term("textClass", "sports")), 10).totalHits);
    // TextIndex
    // All docs containing "l:nehmen"
    assertEquals(1, searcher.search(new TermQuery(new Term("text", "l:nehmen")), 10).totalHits);
    // TextIndex
    // All docs containing "s:den"
    assertEquals(2, searcher.search(new TermQuery(new Term("text", "s:den")), 10).totalHits);
    /*
        assertEquals(3,
              searcher.search(
                new TermQuery(
                  new Term("text", "T")
            ), 10
          ).totalHits
            );
        */
    // BooleanQuery
    // All docs containing "s:den" and "l:sie"
    TermQuery s_den = new TermQuery(new Term("text", "s:den"));
    TermQuery l_sie = new TermQuery(new Term("text", "l:sie"));
    BooleanQuery bool = new BooleanQuery();
    bool.add(s_den, BooleanClause.Occur.MUST);
    bool.add(l_sie, BooleanClause.Occur.MUST);
    assertEquals(1, searcher.search(bool, 10).totalHits);
    // BooleanQuery
    // All docs containing "s:den" or "l:sie"
    bool = new BooleanQuery();
    bool.add(s_den, BooleanClause.Occur.SHOULD);
    bool.add(l_sie, BooleanClause.Occur.SHOULD);
    assertEquals(2, searcher.search(bool, 10).totalHits);
    // RegexpQuery
    // All docs containing ".{4}en" (liefen und Hunden)
    RegexpQuery srquery = new RegexpQuery(new Term("text", "s:.{4}en"));
    assertEquals(2, searcher.search(srquery, 10).totalHits);
    // RegexpQuery
    // All docs containing "E." (Er) (2x)
    srquery = new RegexpQuery(new Term("text", "s:E."));
    assertEquals(2, searcher.search(srquery, 10).totalHits);
    SpanRegexQueryWrapper ssrquery = new SpanRegexQueryWrapper("text", "s:E.");
    assertEquals(2, searcher.search(ssrquery.toQuery(), 10).totalHits);
    // RegexpQuery
    // All docs containing "E." (er) (0x)
    srquery = new RegexpQuery(new Term("text", "s:e."));
    assertEquals(0, searcher.search(srquery, 10).totalHits);
    ssrquery = new SpanRegexQueryWrapper("text", "s:e.");
    assertEquals(0, searcher.search(ssrquery.toQuery(), 10).totalHits);
    // RegexpQuery
    // All docs containing "E."/i ([Ee]r) (2x)
    srquery = new RegexpQuery(new Term("text", "i:e."));
    assertEquals(2, searcher.search(srquery, 10).totalHits);
    ssrquery = new SpanRegexQueryWrapper("text", "s:e.", true);
    assertEquals("SpanMultiTermQueryWrapper(text:/i:e./)", ssrquery.toQuery().toString());
    assertEquals(2, searcher.search(ssrquery.toQuery(), 10).totalHits);
    // All docs containing "ng"/x (Angst) (2x)
    srquery = new RegexpQuery(new Term("text", "s:.*ng.*"));
    assertEquals(2, searcher.search(srquery, 10).totalHits);
    // Check http://comments.gmane.org/gmane.comp.jakarta.lucene.user/52283
    // for Carstens question on wildcards
    // Wildcardquery
    // All docs containing ".{4}en" (liefen und Hunden)
    WildcardQuery swquery = new WildcardQuery(new Term("text", "s:*ng*"));
    assertEquals("text:s:*ng*", swquery.toString());
    assertEquals(2, searcher.search(swquery, 10).totalHits);
    // [base=angst]
    SpanTermQuery stq = new SpanTermQuery(new Term("text", "l:angst"));
    assertEquals(2, searcher.search(srquery, 10).totalHits);
    // vor Angst
    // [orth=vor][orth=Angst]
    SpanNearQuery snquery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "s:vor")), new SpanTermQuery(new Term("text", "s:Angst")) }, 1, true);
    assertEquals(1, searcher.search(snquery, 10).totalHits);
    // Spannearquery [p:VVFIN][]{,5}[m:nom:sg:fem]
    snquery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "p:VVFIN")), new SpanSegmentQueryWrapper("text", "m:c:nom", "m:n:sg", "m:g:fem").toQuery() }, // slop
    5, // inOrder
    true);
    assertEquals(1, searcher.search(snquery, 10).totalHits);
    // Spannearquery [p:VVFIN][m:acc:sg:masc]
    snquery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "p:VVFIN")), new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "m:c:acc")), new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "m:n:sg")), new SpanTermQuery(new Term("text", "m:g:masc")) }, -1, false) }, // slop
    -1, // inOrder
    false) // new SpanTermQuery(new Term("text", "m:-acc:--sg:masc"))
    }, // slop
    0, // inOrder
    true);
    assertEquals(1, searcher.search(snquery, 10).totalHits);
    // Spannearquery [p:VVFIN|m:3:sg:past:ind]
    // Exact match!
    snquery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "p:VVFIN")), new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "m:p:3")), new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "m:n:sg")), new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "m:t:past")), new SpanTermQuery(new Term("text", "m:m:ind")) }, -1, false) }, -1, false) }, -1, false) }, // slop
    -1, // inOrder
    false);
    assertEquals(2, searcher.search(snquery, 10).totalHits);
    // To make sure, this is not equal:
    // Spannearquery [p:VVFIN & m:3:sg:past:ind]
    // Exact match!
    // Maybe it IS equal
    snquery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "p:VVFIN")), new SpanTermQuery(new Term("text", "m:p:3")), new SpanTermQuery(new Term("text", "m:n:sg")), new SpanTermQuery(new Term("text", "m:t:past")), new SpanTermQuery(new Term("text", "m:m:ind")) }, // slop
    -1, // inOrder
    false);
    assertNotEquals(2, searcher.search(snquery, 10).totalHits);
    // assertEquals(2, searcher.search(snquery, 10).totalHits);
    // Spannearquery [p:VVFIN & m:3:sg & past:ind]
    SpanSegmentQueryWrapper sniquery = new SpanSegmentQueryWrapper("text", "p:VVFIN", "m:p:3", "m:n:sg", "m:t:past", "m:m:ind");
    assertEquals(2, searcher.search(sniquery.toQuery(), 10).totalHits);
    // Todo:
    /*
        sniquery = new SpanSegmentQuery(
              "text",
          "p:VVFIN",
          "m:p:3",
          "m:n:sg",
          "m:t:past",
          "m:m:ind"
            );
        */
    // Spannearquery [p:VVFIN][]{,5}[m:nom:sg:fem]
    snquery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "p:VVFIN")), new SpanSegmentQueryWrapper("text", "m:c:nom", "m:n:sg", "m:g:fem").toQuery() }, // slop
    5, // inOrder
    true);
    assertEquals(1, searcher.search(snquery, 10).totalHits);
    sniquery = new SpanSegmentQueryWrapper("text", "p:VVFIN", "m:p:3", "m:t:past", "m:m:ind", "m:n:sg");
    assertEquals(2, searcher.search(sniquery.toQuery(), 10).totalHits);
    // [p = VVFIN & m:p = 3 & m:t = past & m:n != pl] or
    // [p = VVFIN & m:p = 3 & m:t = past & !m:n = pl]
    // TODO: Problem: What should happen in case the category does not exist?
    // pssible solution: & ( m:n != pl & exists(m:n))
    sniquery = new SpanSegmentQueryWrapper("text", "p:VVFIN", "m:p:3", "m:t:past");
    SpanQuery snqquery = new SpanNotQuery(sniquery.toQuery(), new SpanTermQuery(new Term("text", "m:n:pl")));
    assertEquals(2, searcher.search(snqquery, 10).totalHits);
    // [p = NN & (m:c: = dat | m:c = acc)]
    snquery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "p:NN")), new SpanOrQuery(new SpanTermQuery(new Term("text", "m:c:nom")), new SpanTermQuery(new Term("text", "m:c:acc"))) }, -1, false);
    assertEquals(2, searcher.search(snqquery, 10).totalHits);
    // [p = NN & !(m:c: = nom | m:c = acc)]
    snqquery = new SpanNotQuery(new SpanTermQuery(new Term("text", "p:NN")), new SpanOrQuery(new SpanTermQuery(new Term("text", "m:c:nom")), new SpanTermQuery(new Term("text", "m:c:acc"))));
    assertEquals(1, searcher.search(snqquery, 10).totalHits);
    // [p = NN & !(m:c = nom)]
    snqquery = new SpanNotQuery(new SpanTermQuery(new Term("text", "p:NN")), new SpanTermQuery(new Term("text", "m:c:nom")));
    assertEquals(3, searcher.search(snqquery, 10).totalHits);
    // [p=NN & !(m:c = acc)]
    snqquery = new SpanNotQuery(new SpanTermQuery(new Term("text", "p:NN")), new SpanTermQuery(new Term("text", "m:c:acc")));
    assertEquals(2, searcher.search(snqquery, 10).totalHits);
    // [p=PPER][][p=ART]
    snquery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "p:PPER")), new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "T")), new SpanTermQuery(new Term("text", "p:ART")) }, 0, true) }, 0, true);
    assertEquals(1, searcher.search(snquery, 10).totalHits);
    // Todo:
    // [orth=się][]{2,4}[base=bać]
    // [orth=się][orth!="[.!?,:]"]{,5}[base=bać]|[base=bać][base="on|ja|ty|my|wy"]?[orth=się]
    // [pos=subst & orth="a.*"]{2}
    // [tag=subst:sg:nom:n]
    // [case==acc & case==gen] ??
    // [case~acc & case~gen]
    // [case~~acc]
    // [base=bać][orth!=się]+[orth=się] within s
    // [][][p:VAFIN] within s
    // [][p:VAFIN] within s
    // [][][p:VAFIN]
    snquery = new SpanNearQuery(new SpanQuery[] { new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "T")), new SpanTermQuery(new Term("text", "T")) }, 0, true), new SpanTermQuery(new Term("text", "p:VAFIN")) }, 0, true);
    assertEquals(1, searcher.search(snquery, 10).totalHits);
    /*
        http://stackoverflow.com/questions/1311199/finding-the-position-of-search-hits-from-lucene
        */
    StringBuilder payloadString = new StringBuilder();
    Map<Term, TermContext> termContexts = new HashMap<>();
    for (LeafReaderContext atomic : reader.leaves()) {
        Bits bitset = atomic.reader().getLiveDocs();
        // Spans spans = NearSpansOrdered();
        Spans spans = snquery.getSpans(atomic, bitset, termContexts);
        while (spans.next()) {
            int docid = atomic.docBase + spans.doc();
            if (spans.isPayloadAvailable()) {
                for (byte[] payload : spans.getPayload()) {
                    /* retrieve payload for current matching span */
                    payloadString.append(new String(payload));
                    payloadString.append(" | ");
                }
                ;
            }
            ;
        }
        ;
    }
    ;
    // assertEquals(33, payloadString.length());
    assertEquals(0, payloadString.length());
    // [][][p:VAFIN]
    // without collecting payloads
    snquery = new SpanNearQuery(new SpanQuery[] { new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "T")), new SpanTermQuery(new Term("text", "T")) }, 0, true, false), new SpanTermQuery(new Term("text", "p:VAFIN")) }, 0, true, false);
    assertEquals(1, searcher.search(snquery, 10).totalHits);
    payloadString = new StringBuilder();
    termContexts = new HashMap<>();
    for (LeafReaderContext atomic : reader.leaves()) {
        Bits bitset = atomic.reader().getLiveDocs();
        // Spans spans = NearSpansOrdered();
        Spans spans = snquery.getSpans(atomic, bitset, termContexts);
        while (spans.next()) {
            int docid = atomic.docBase + spans.doc();
            for (byte[] payload : spans.getPayload()) {
                /* retrieve payload for current matching span */
                payloadString.append(new String(payload));
                payloadString.append(" | ");
            }
            ;
        }
        ;
    }
    ;
    assertEquals(0, payloadString.length());
    // [][][p:VAFIN] in s
    // ([e:s:<][]*[T] | [T & e:s:<]) [T] ([p:VAFIN & e:s:>] | [T][]*[e:s:>]
    /*
        
        SpanSegmentWithinQuery ssequery = new SpanSegmentWithinQuery(
            "text","s", new SpanSegmentSequenceQuery("text", "T", "T", "p:VAFIN")
            );
        assertEquals(0, searcher.search(ssequery.toQuery(), 10).totalHits);
        
        payloadString = new StringBuilder();
        termContexts = new HashMap<>();
        for (LeafReaderContext atomic : reader.leaves()) {
            Bits bitset = atomic.reader().getLiveDocs();
            // Spans spans = NearSpansOrdered();
            Spans spans = ssequery.toQuery().getSpans(atomic, bitset, termContexts);
        
            while (spans.next()) {
        	int docid = atomic.docBase + spans.doc();
        	for (byte[] payload : spans.getPayload()) {
        	/// retrieve payload for current matching span
        	    payloadString.append(new String(payload));
        	    payloadString.append(" | ");
        	};
            };
        };
        assertEquals(0, payloadString.length(), 1);
        
        ssequery = new SpanSegmentWithinQuery(
            "text","s", new SpanSegmentSequenceQuery("text", "T", "p:VAFIN")
            );
        
        assertEquals("for " + ssequery.toQuery(),
        	     1, searcher.search(ssequery.toQuery(), 10).totalHits);
        
        payloadString = new StringBuilder();
        termContexts = new HashMap<>();
        for (LeafReaderContext atomic : reader.leaves()) {
            Bits bitset = atomic.reader().getLiveDocs();
            // Spans spans = NearSpansOrdered();
            Spans spans = ssequery.toQuery().getSpans(atomic, bitset, termContexts);
        
            while (spans.next()) {
        	int docid = atomic.docBase + spans.doc();
        	for (byte[] payload : spans.getPayload()) {
        	    // retrieve payload for current matching span
        	    payloadString.append(new String(payload));
        	    payloadString.append(" | ");
        	};
        	fail("Doc: " + docid + " with " + spans.start() + "-" + spans.end() + " || " + payloadString.toString());
            };
        };
        assertEquals(20, payloadString.length());
        
        */
    // --------------------______>
    // Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), ssequery.toQuery());
    /*
        TopDocs topDocs = is.search(snq, 1);
        Set<String> payloadSet = new HashSet<String>();
        for (int i = 0; i < topDocs.scoreDocs.length; i++) {
          while (spans.next()) {
            Collection<byte[]> payloads = spans.getPayload();
        
            for (final byte [] payload : payloads) {
              payloadSet.add(new String(payload, "UTF-8"));
            }
          }
        }
        */
    /*
        Alternativ:
        IndexReader reader = writer.getReader();
        writer.close();
        IndexSearcher searcher = newSearcher(reader);
        
        PayloadSpanUtil psu = new PayloadSpanUtil(searcher.getTopReaderContext());
        
        Collection<byte[]> payloads = psu.getPayloadsForQuery(new TermQuery(new Term(PayloadHelper.FIELD, "rr")));
        if(VERBOSE)
          System.out.println("Num payloads:" + payloads.size());
        for (final byte [] bytes : payloads) {
          if(VERBOSE)
            System.out.println(new String(bytes, "UTF-8"));
        }
        */
    /* new: */
    // PayloadHelper helper = new PayloadHelper();
    // Map<Term, TermContext> termContexts = new HashMap<>();
    // Spans spans;
    // spans = snquery.getSpans(searcher.getIndexReader());
    // searcher = helper.setUp(similarity, 1000);
    /*
        IndexReader reader = search.getReader(querycontainer.getFoundry());
        Spans luceneSpans;
        Bits bitset = atomic.reader().getLiveDocs();
        for (byte[] payload : luceneSpans.getPayload())
        
        /* Iterate over all matching documents */
    /*
            while (luceneSpans.next() && total < config.getMaxhits()) {
        	Span matchSpan;
        	StringBuilder payloadString = new StringBuilder();
        	int docid = atomic.docBase + luceneSpans.doc();
        	String docname = search.retrieveDocname(docid,
        					querycontainer.getFoundry());
        					total++;
        
        	for (byte[] payload : luceneSpans.getPayload())
        */
    /* retrieve payload for current matching span */
    // payloadString.append(new String(payload));
    /* create span containing result */
    /*
        		matchSpan = new Span(docname);
        		matchSpan.setIndexdocid(docid);
        		matchSpan.setLayer(querycontainer.getLayer());
        		matchSpan.storePayloads(payloadString.toString());
        		matchSpans.add(matchSpan);
        */
    /*
         * topdocs = searcher.search(new ConstantScoreQuery(corpusQ add
         * position to list of positions to be considered for later
         * searches
         */
    /*
        validValues.put(docname,
        		matchSpan.getPayload(config.getPrefix()));
        }
        */
    // Todo: API made by add() typisiert für queries, strings
    // SpanPayloadCheckQuery for sentences!
    /* Support regular expression in SpanSegmentQuery */
    // new Regexp();
    // new Term();
    /*
          Vielleicht: spanSegmentQuery(new Term(), new Wildcard(), new Regex());
         */
    // And Not ->
    // SpanTermDiffQuery
    /*
        SpanNearQuery poquery = new SpanNearQuery(
        
        );
        */
    reader.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) TermContext(org.apache.lucene.index.TermContext) RegexpQuery(org.apache.lucene.search.RegexpQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) Spans(org.apache.lucene.search.spans.Spans) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) MultiTerm(de.ids_mannheim.korap.index.MultiTerm) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanRegexQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanRegexQueryWrapper) SpanSegmentQueryWrapper(de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper) IndexWriter(org.apache.lucene.index.IndexWriter) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Bits(org.apache.lucene.util.Bits) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Test(org.junit.Test) Test(de.ids_mannheim.korap.Test)

Aggregations

SpanRegexQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanRegexQueryWrapper)9 Test (org.junit.Test)7 SpanSegmentQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanSegmentQueryWrapper)5 SpanAlterQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanAlterQueryWrapper)4 JsonNode (com.fasterxml.jackson.databind.JsonNode)2 SpanQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanQueryWrapper)2 SpanSequenceQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanSequenceQueryWrapper)2 QueryException (de.ids_mannheim.korap.util.QueryException)2 Test (de.ids_mannheim.korap.Test)1 MultiTerm (de.ids_mannheim.korap.index.MultiTerm)1 QueryBuilder (de.ids_mannheim.korap.query.QueryBuilder)1 SpanRepetitionQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanRepetitionQueryWrapper)1 SpanWildcardQueryWrapper (de.ids_mannheim.korap.query.wrap.SpanWildcardQueryWrapper)1 Iterator (java.util.Iterator)1 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)1 DirectoryReader (org.apache.lucene.index.DirectoryReader)1 IndexWriter (org.apache.lucene.index.IndexWriter)1 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)1 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)1 Term (org.apache.lucene.index.Term)1