use of de.ids_mannheim.korap.query.SpanElementQuery in project Krill by KorAP.
the class TestElementIndex method indexExample3.
@Test
public void indexExample3() throws IOException {
KrillIndex ki = new KrillIndex();
// <a><a><a>u</a></a></a>
FieldDocument fd = new FieldDocument();
fd.addTV("base", "xyz", "[(0-3)s:xyz|<>:a$<b>64<i>0<i>3<i>0<b>0|" + "<>:a$<b>64<i>0<i>3<i>0<b>0|" + "<>:a$<b>64<i>0<i>3<i>0<b>0|<>:b$<b>64<i>0<i>3<i>0<b>0]");
ki.addDoc(fd);
// <a><b>x<a>y<a>zcde</a>cde</a>cde</b></a>
fd = new FieldDocument();
fd.addTV("base", "x y z c d e c d e c d e ", "[(0-3)s:x|<>:a$<b>64<i>0<i>36<i>12<b>0|<>:b$<b>64<i>0<i>36<i>12<b>0]" + "[(3-6)s:y|<>:a$<b>64<i>3<i>27<i>9<b>0]" + "[(6-9)s:z|<>:a$<b>64<i>6<i>18<i>6]" + "[(9-12)s:c<b>0]" + "[(12-15)s:d]" + "[(15-18)s:e]" + "[(18-21)s:c]" + "[(21-24)s:d]" + "[(24-27)s:e]" + "[(27-30)s:c]" + "[(30-33)s:d]" + "[(33-36)s:e]");
ki.addDoc(fd);
// xyz
fd = new FieldDocument();
fd.addTV("base", "x y z ", "[(0-3)s:x]" + "[(3-6)s:y]" + "[(6-9)s:z]");
ki.addDoc(fd);
// <a>x<a><b>y<a>zcde</a>cde</b></a>cde</a>
fd = new FieldDocument();
fd.addTV("base", "x y z k l m k l m k l m ", "[(0-3)s:x|<>:a$<b>64<i>0<i>3<i>12<b>0]" + "[(3-6)s:y|<>:a$<b>64<i>3<i>6<i>9<b>0|<>:b$<b>64<i>3<i>6<i>9<b>0]" + "[(6-9)s:z|<>:a$<b>64<i>6<i>9<i>6<b>0]" + "[(9-12)s:k<b>0]" + "[(12-15)s:l]" + "[(15-18)s:m]" + "[(18-21)s:k]" + "[(21-24)s:l]" + "[(24-27)s:m]" + "[(27-30)s:k]" + "[(30-33)s:l]" + "[(33-36)s:m]");
ki.addDoc(fd);
// <a><a><a>h</a>hhij</a>hij</a>hij</a>
fd = new FieldDocument();
fd.addTV("base", "h h i j h i j h i j ", "[(0-3)s:h|" + "<>:a$<b>64<i>0<i>18<i>3<b>0|" + "<>:a$<b>64<i>0<i>27<i>6<b>0|" + "<>:a$<b>64<i>0<i>36<i>9<b>0]" + "[(3-6)s:h]" + "[(12-15)s:i]" + "[(15-18)s:j]" + "[(18-21)s:h]" + "[(21-24)s:i]" + "[(24-27)s:j]" + "[(27-30)s:h]" + "[(30-33)s:i]" + "[(33-36)s:j]");
ki.addDoc(fd);
// xyz
fd = new FieldDocument();
fd.addTV("base", "a b c ", "[(0-3)s:a]" + "[(3-6)s:b]" + "[(6-9)s:c]");
ki.addDoc(fd);
// Save documents
ki.commit();
assertEquals(6, ki.numberOf("documents"));
SpanQuery sq = new SpanElementQuery("base", "a");
Result kr = ki.search(sq, (short) 15);
// System.err.println(kr.toJSON());
assertEquals("totalResults", kr.getTotalResults(), 12);
assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos);
assertEquals("EndPos (0)", 0, kr.getMatch(0).endPos);
assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos);
assertEquals("EndPos (1)", 0, kr.getMatch(1).endPos);
assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos);
assertEquals("EndPos (2)", 0, kr.getMatch(2).endPos);
assertEquals("StartPos (3)", 0, kr.getMatch(3).startPos);
assertEquals("EndPos (3)", 12, kr.getMatch(3).endPos);
assertEquals("StartPos (4)", 1, kr.getMatch(4).startPos);
assertEquals("EndPos (4)", 9, kr.getMatch(4).endPos);
assertEquals("StartPos (5)", 2, kr.getMatch(5).startPos);
assertEquals("EndPos (5)", 6, kr.getMatch(5).endPos);
assertEquals("StartPos (6)", 0, kr.getMatch(6).startPos);
assertEquals("EndPos (6)", 12, kr.getMatch(6).endPos);
assertEquals("StartPos (7)", 1, kr.getMatch(7).startPos);
assertEquals("EndPos (7)", 9, kr.getMatch(7).endPos);
assertEquals("StartPos (8)", 2, kr.getMatch(8).startPos);
assertEquals("EndPos (8)", 6, kr.getMatch(8).endPos);
assertEquals("StartPos (9)", 0, kr.getMatch(9).startPos);
assertEquals("EndPos (9)", 3, kr.getMatch(9).endPos);
assertEquals("StartPos (10)", 0, kr.getMatch(10).startPos);
assertEquals("EndPos (10)", 6, kr.getMatch(10).endPos);
assertEquals("StartPos (11)", 0, kr.getMatch(11).startPos);
assertEquals("EndPos (11)", 9, kr.getMatch(11).endPos);
}
use of de.ids_mannheim.korap.query.SpanElementQuery in project Krill by KorAP.
the class TestElementIndex method indexExample2.
@Test
public void indexExample2() throws IOException {
KrillIndex ki = new KrillIndex();
// <a><a><a>h</a>hhij</a>hij</a>hij</a>
FieldDocument fd = new FieldDocument();
fd.addTV("base", "h h i j h i j h i j ", "[(0-3)s:h|" + "<>:a$<b>64<i>0<i>18<i>3<b>0|" + "<>:a$<b>64<i>0<i>27<i>6<b>0|" + "<>:a$<b>64<i>0<i>36<i>9]" + "[(3-6)s:h]" + "[(12-15)s:i<b>0]" + "[(15-18)s:j]" + "[(18-21)s:h]" + "[(21-24)s:i]" + "[(24-27)s:j]" + "[(27-30)s:h]" + "[(30-33)s:i]" + "[(33-36)s:j]");
ki.addDoc(fd);
// Save documents
ki.commit();
assertEquals(1, ki.numberOf("documents"));
SpanQuery sq = new SpanElementQuery("base", "a");
Result kr = ki.search(sq, (short) 10);
assertEquals("totalResults", kr.getTotalResults(), 3);
assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos);
assertEquals("EndPos (0)", 3, kr.getMatch(0).endPos);
assertEquals("StartPos (1)", 0, kr.getMatch(1).startPos);
assertEquals("EndPos (1)", 6, kr.getMatch(1).endPos);
assertEquals("StartPos (2)", 0, kr.getMatch(2).startPos);
assertEquals("EndPos (2)", 9, kr.getMatch(2).endPos);
}
use of de.ids_mannheim.korap.query.SpanElementQuery in project Krill by KorAP.
the class TestElementIndex method indexExample1.
// Todo: primary data as a non-indexed field separated.
@Test
public void indexExample1() throws IOException {
KrillIndex ki = new KrillIndex();
// <a>x<a>y<a>zhij</a>hij</a>hij</a>hij</a>
FieldDocument fd = new FieldDocument();
fd.addTV("base", "x y z h i j h i j h i j ", "[(0-3)s:x|<>:a$<b>64<i>0<i>3<i>12<b>0]" + "[(3-6)s:y|<>:a$<b>64<i>3<i>6<i>9<b>0]" + "[(6-9)s:z|<>:a$<b>64<i>6<i>9<i>6]" + "[(9-12)s:h<b>0]" + "[(12-15)s:i]" + "[(15-18)s:j]" + "[(18-21)s:h]" + "[(21-24)s:i]" + "[(24-27)s:j]" + "[(27-30)s:h]" + "[(30-33)s:i]" + "[(33-36)s:j]");
ki.addDoc(fd);
// <a>x<a>y<a>zcde</a>cde</a>cde</a>cde</a>
fd = new FieldDocument();
fd.addTV("base", "x y z c d e c d e c d e ", "[(0-3)s:x|<>:a$<b>64<i>0<i>3<i>12<b>0]" + "[(3-6)s:y|<>:a$<b>64<i>3<i>6<i>9<b>0]" + "[(6-9)s:z|<>:a$<b>64<i>6<i>9<i>6]" + "[(9-12)s:c<b>0]" + "[(12-15)s:d]" + "[(15-18)s:e]" + "[(18-21)s:c]" + "[(21-24)s:d]" + "[(24-27)s:e]" + "[(27-30)s:c]" + "[(30-33)s:d]" + "[(33-36)s:e]");
ki.addDoc(fd);
// Save documents
ki.commit();
assertEquals(2, ki.numberOf("documents"));
SpanQuery sq = new SpanElementQuery("base", "a");
Result kr = ki.search(sq, (short) 10);
assertEquals("totalResults", kr.getTotalResults(), 6);
assertEquals("StartPos (0)", 0, kr.getMatch(0).startPos);
assertEquals("EndPos (0)", 12, kr.getMatch(0).endPos);
assertEquals("StartPos (1)", 1, kr.getMatch(1).startPos);
assertEquals("EndPos (1)", 9, kr.getMatch(1).endPos);
assertEquals("StartPos (2)", 2, kr.getMatch(2).startPos);
assertEquals("EndPos (2)", 6, kr.getMatch(2).endPos);
assertEquals("StartPos (0)", 0, kr.getMatch(3).startPos);
assertEquals("EndPos (0)", 12, kr.getMatch(3).endPos);
assertEquals("StartPos (1)", 1, kr.getMatch(4).startPos);
assertEquals("EndPos (1)", 9, kr.getMatch(4).endPos);
assertEquals("StartPos (2)", 2, kr.getMatch(5).startPos);
assertEquals("EndPos (2)", 6, kr.getMatch(5).endPos);
// System.err.println(kr.toJSON());
}
use of de.ids_mannheim.korap.query.SpanElementQuery in project Krill by KorAP.
the class Match method expandContextToSpan.
// Expand the context to a span
// THIS IS NOT VERY CLEVER - MAKE IT MORE CLEVER!
public int[] expandContextToSpan(LeafReaderContext atomic, Bits bitset, String field, String element) {
try {
// Store character offsets in ByteBuffer
ByteBuffer bb = ByteBuffer.allocate(24);
SpanElementQuery cquery = new SpanElementQuery(field, element);
Spans contextSpans = cquery.getSpans(atomic, bitset, new HashMap<Term, TermContext>());
int newStart = -1, newEnd = -1;
int newStartChar = -1, newEndChar = -1;
if (DEBUG)
log.trace("Extend match to context boundary with {} in docID {}", cquery.toString(), this.localDocID);
while (true) {
// Game over
if (contextSpans.next() != true)
break;
if (contextSpans.doc() != this.localDocID) {
contextSpans.skipTo(this.localDocID);
if (contextSpans.doc() != this.localDocID)
break;
}
;
// if it's closer to the match than everything before
if (contextSpans.start() <= this.getStartPos() && contextSpans.end() >= this.getStartPos()) {
// Set as newStart
newStart = contextSpans.start() > newStart ? contextSpans.start() : newStart;
if (DEBUG)
log.trace("NewStart is at {}", newStart);
// Get character offset (start)
if (contextSpans.isPayloadAvailable()) {
try {
bb.rewind();
for (byte[] b : contextSpans.getPayload()) {
// Not an element span
if (b[0] != (byte) 64)
continue;
bb.rewind();
bb.put(b);
bb.position(1);
newStartChar = bb.getInt();
newEndChar = bb.getInt();
break;
}
;
} catch (Exception e) {
log.warn("Some problems with ByteBuffer: {}", e.getMessage());
}
;
}
;
} else {
// Has to be resettet to avoid multiple readings of the payload
newEndChar = 0;
}
;
// There's an s found, that ends after the match
if (contextSpans.end() >= this.getEndPos()) {
newEnd = contextSpans.end();
// Get character offset (end)
if (newEndChar == 0 && contextSpans.isPayloadAvailable()) {
try {
bb.rewind();
for (byte[] b : contextSpans.getPayload()) {
// Not an element span
if (b[0] != (byte) 64)
continue;
bb.rewind();
bb.put(b);
bb.position(1);
newEndChar = bb.getInt(1);
break;
}
;
} catch (Exception e) {
log.warn(e.getMessage());
}
;
}
;
break;
}
;
}
;
// We have a new match surrounding
if (DEBUG)
log.trace("New match spans from {}-{}/{}-{}", newStart, newEnd, newStartChar, newEndChar);
return new int[] { newStart, newEnd, newStartChar, newEndChar };
} catch (IOException e) {
log.error(e.getMessage());
}
;
return new int[] { -1, -1, -1, -1 };
}
use of de.ids_mannheim.korap.query.SpanElementQuery in project Krill by KorAP.
the class TestNextIndex method indexExample3.
@Test
public void indexExample3() throws IOException {
KrillIndex ki = new KrillIndex();
// abcabcabac
FieldDocument fd = new FieldDocument();
fd.addTV("base", "abcabcabac", "[(0-1)s:a|i:a|_0$<i>0<i>1|-:t$<i>10]" + "[(1-2)s:b|i:b|_1$<i>1<i>2]" + "[(2-3)s:c|i:c|_2$<i>2<i>3]" + "[(3-4)s:a|i:a|_3$<i>3<i>4|<>:x$<b>64<i>3<i>7<i>7<b>0]" + "[(4-5)s:b|i:b|_4$<i>4<i>5]" + "[(5-6)s:c|i:c|_5$<i>5<i>6]" + "[(6-7)s:a|i:a|_6$<i>6<i>7]" + "[(7-8)s:b|i:b|_7$<i>7<i>8]" + "[(8-9)s:a|i:a|_8$<i>8<i>9]" + "[(9-10)s:c|i:c|_9$<i>9<i>10]");
ki.addDoc(fd);
ki.commit();
SpanQuery sq;
Result kr;
sq = new SpanNextQuery(new SpanElementQuery("base", "x"), new SpanTermQuery(new Term("base", "s:b")));
kr = ki.search(sq, (short) 10);
assertEquals("abc[[abcab]]ac", kr.getMatch(0).getSnippetBrackets());
}
Aggregations