use of org.apache.lucene.search.WildcardQuery in project rubia-forums by flashboss.
the class ForumsSearchModuleImpl method findTopics.
@SuppressWarnings("unchecked")
public ResultPage<Topic> findTopics(SearchCriteria criteria) throws ModuleException {
if (criteria != null) {
try {
EntityManager session = getSession();
FullTextSession fullTextSession = getFullTextSession((Session) session.getDelegate());
Builder builder = new Builder();
String keywords = criteria.getKeywords();
if (keywords != null && keywords.length() != 0) {
String[] fields = null;
Searching searching = Searching.valueOf(criteria.getSearching());
switch(searching) {
case TITLE_MSG:
fields = new String[] { "message.text", "topic.subject" };
break;
case MSG:
fields = new String[] { "message.text" };
break;
}
MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, new StandardAnalyzer());
builder.add(parser.parse(keywords), MUST);
}
String forumId = criteria.getForum();
if (forumId != null && forumId.length() != 0) {
builder.add(new TermQuery(new Term("topic.forum.id", forumId)), MUST);
}
String categoryId = criteria.getCategory();
if (categoryId != null && categoryId.length() != 0) {
builder.add(new TermQuery(new Term("topic.forum.category.id", categoryId)), MUST);
}
String userName = criteria.getAuthor();
if (userName != null && userName.length() != 0) {
builder.add(new WildcardQuery(new Term("poster.userId", userName)), MUST);
}
String timePeriod = criteria.getTimePeriod();
if (timePeriod != null && timePeriod.length() != 0) {
addPostTimeQuery(builder, TimePeriod.valueOf(timePeriod));
}
FullTextQuery fullTextQuery = fullTextSession.createFullTextQuery(builder.build(), Post.class);
SortOrder sortOrder = SortOrder.valueOf(criteria.getSortOrder());
SortBy sortBy = valueOf(criteria.getSortBy());
fullTextQuery.setSort(getSort(sortBy, sortOrder));
fullTextQuery.setProjection("topic.id");
LinkedHashSet<Integer> topicIds = new LinkedHashSet<Integer>();
LinkedHashSet<Integer> topicToDispIds = new LinkedHashSet<Integer>();
int start = criteria.getPageSize() * criteria.getPageNumber();
int end = start + criteria.getPageSize();
int index = 0;
for (Object o : fullTextQuery.list()) {
Integer id = (Integer) ((Object[]) o)[0];
if (topicIds.add(id)) {
if (index >= start && index < end) {
topicToDispIds.add(id);
}
index++;
}
}
List<Topic> topics = null;
if (topicToDispIds.size() > 0) {
Query q = session.createQuery("from Topic as t join fetch t.poster where t.id IN ( :topicIds )");
q.setParameter("topicIds", topicToDispIds);
List<Topic> results = q.getResultList();
topics = new LinkedList<Topic>();
for (Integer id : topicToDispIds) {
for (Topic topic : results) {
if (id.equals(topic.getId())) {
topics.add(topic);
break;
}
}
}
}
ResultPage<Topic> resultPage = new ResultPage<Topic>();
resultPage.setPage(topics);
resultPage.setResultSize(topicIds.size());
return resultPage;
} catch (ParseException e) {
return null;
} catch (Exception e) {
throw new ModuleException(e.getMessage(), e);
}
} else {
throw new IllegalArgumentException("criteria cannot be null");
}
}
use of org.apache.lucene.search.WildcardQuery in project rubia-forums by flashboss.
the class ForumsSearchModuleImpl method findPosts.
@SuppressWarnings("unchecked")
public ResultPage<Post> findPosts(SearchCriteria criteria) throws ModuleException {
if (criteria != null) {
try {
EntityManager session = getSession();
FullTextSession fullTextSession = getFullTextSession((Session) session.getDelegate());
Builder builder = new Builder();
String keywords = criteria.getKeywords();
if (keywords != null && keywords.length() != 0) {
String[] fields = null;
Searching searching = Searching.valueOf(criteria.getSearching());
switch(searching) {
case TITLE_MSG:
fields = new String[] { "message.text", "topic.subject" };
break;
case MSG:
fields = new String[] { "message.text" };
break;
}
MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, new StandardAnalyzer());
builder.add(parser.parse(keywords), MUST);
}
String forumId = criteria.getForum();
if (forumId != null && forumId.length() != 0) {
builder.add(new TermQuery(new Term("topic.forum.id", forumId)), MUST);
}
String categoryId = criteria.getCategory();
if (categoryId != null && categoryId.length() != 0) {
builder.add(new TermQuery(new Term("topic.forum.category.id", categoryId)), MUST);
}
String userName = criteria.getAuthor();
if (userName != null && userName.length() != 0) {
builder.add(new WildcardQuery(new Term("poster.userId", userName)), MUST);
}
String timePeriod = criteria.getTimePeriod();
if (timePeriod != null && timePeriod.length() != 0) {
addPostTimeQuery(builder, TimePeriod.valueOf(timePeriod));
}
FullTextQuery fullTextQuery = fullTextSession.createFullTextQuery(builder.build(), Post.class);
SortOrder sortOrder = SortOrder.valueOf(criteria.getSortOrder());
String sortByStr = criteria.getSortBy();
SortBy sortBy = null;
if (sortByStr != null)
sortBy = valueOf(sortByStr);
fullTextQuery.setSort(getSort(sortBy, sortOrder));
fullTextQuery.setFirstResult(criteria.getPageSize() * criteria.getPageNumber());
fullTextQuery.setMaxResults(criteria.getPageSize());
ResultPage<Post> resultPage = new ResultPage<Post>();
resultPage.setPage(fullTextQuery.list());
resultPage.setResultSize(fullTextQuery.getResultSize());
return resultPage;
} catch (ParseException e) {
return null;
} catch (Exception e) {
throw new ModuleException(e.getMessage(), e);
}
} else {
throw new IllegalArgumentException("criteria cannot be null");
}
}
use of org.apache.lucene.search.WildcardQuery in project rubia-forums by flashboss.
the class ForumsSearchModuleImpl method findTopics.
@SuppressWarnings("unchecked")
public ResultPage<Topic> findTopics(SearchCriteria criteria) throws ModuleException {
if (criteria != null) {
try {
EntityManager session = getSession();
FullTextSession fullTextSession = getFullTextSession((Session) session.getDelegate());
Builder builder = new Builder();
String keywords = criteria.getKeywords();
if (keywords != null && keywords.length() != 0) {
String[] fields = null;
Searching searching = Searching.valueOf(criteria.getSearching());
switch(searching) {
case TITLE_MSG:
fields = new String[] { "message.text", "topic.subject" };
break;
case MSG:
fields = new String[] { "message.text" };
break;
}
MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, new StandardAnalyzer());
builder.add(parser.parse(keywords), MUST);
}
String forumId = criteria.getForum();
if (forumId != null && forumId.length() != 0) {
builder.add(new TermQuery(new Term("topic.forum.id", forumId)), MUST);
}
String categoryId = criteria.getCategory();
if (categoryId != null && categoryId.length() != 0) {
builder.add(new TermQuery(new Term("topic.forum.category.id", categoryId)), MUST);
}
String userName = criteria.getAuthor();
if (userName != null && userName.length() != 0) {
builder.add(new WildcardQuery(new Term("poster.userId", userName)), MUST);
}
String timePeriod = criteria.getTimePeriod();
if (timePeriod != null && timePeriod.length() != 0) {
addPostTimeQuery(builder, TimePeriod.valueOf(timePeriod));
}
FullTextQuery fullTextQuery = fullTextSession.createFullTextQuery(builder.build(), Post.class);
SortOrder sortOrder = SortOrder.valueOf(criteria.getSortOrder());
SortBy sortBy = valueOf(criteria.getSortBy());
fullTextQuery.setSort(getSort(sortBy, sortOrder));
fullTextQuery.setProjection("topic.id");
LinkedHashSet<Integer> topicIds = new LinkedHashSet<Integer>();
LinkedHashSet<Integer> topicToDispIds = new LinkedHashSet<Integer>();
int start = criteria.getPageSize() * criteria.getPageNumber();
int end = start + criteria.getPageSize();
int index = 0;
for (Object o : fullTextQuery.list()) {
Integer id = (Integer) ((Object[]) o)[0];
if (topicIds.add(id)) {
if (index >= start && index < end) {
topicToDispIds.add(id);
}
index++;
}
}
List<Topic> topics = null;
if (topicToDispIds.size() > 0) {
Query q = session.createQuery("from Topic as t join fetch t.poster where t.id IN ( :topicIds )");
q.setParameter("topicIds", topicToDispIds);
List<Topic> results = q.getResultList();
topics = new LinkedList<Topic>();
for (Integer id : topicToDispIds) {
for (Topic topic : results) {
if (id.equals(topic.getId())) {
topics.add(topic);
break;
}
}
}
}
ResultPage<Topic> resultPage = new ResultPage<Topic>();
resultPage.setPage(topics);
resultPage.setResultSize(topicIds.size());
return resultPage;
} catch (ParseException e) {
return null;
} catch (Exception e) {
throw new ModuleException(e.getMessage(), e);
}
} else {
throw new IllegalArgumentException("criteria cannot be null");
}
}
use of org.apache.lucene.search.WildcardQuery in project jackrabbit-oak by apache.
the class LuceneIndex method addNonFullTextConstraints.
private static void addNonFullTextConstraints(List<Query> qs, Filter filter, IndexReader reader, Analyzer analyzer, IndexDefinition indexDefinition) {
if (!filter.matchesAllTypes()) {
addNodeTypeConstraints(qs, filter);
}
String path = filter.getPath();
switch(filter.getPathRestriction()) {
case ALL_CHILDREN:
if (USE_PATH_RESTRICTION) {
if ("/".equals(path)) {
break;
}
if (!path.endsWith("/")) {
path += "/";
}
qs.add(new PrefixQuery(newPathTerm(path)));
}
break;
case DIRECT_CHILDREN:
if (USE_PATH_RESTRICTION) {
if (!path.endsWith("/")) {
path += "/";
}
qs.add(new PrefixQuery(newPathTerm(path)));
}
break;
case EXACT:
qs.add(new TermQuery(newPathTerm(path)));
break;
case PARENT:
if (denotesRoot(path)) {
// there's no parent of the root node
// we add a path that can not possibly occur because there
// is no way to say "match no documents" in Lucene
qs.add(new TermQuery(new Term(FieldNames.PATH, "///")));
} else {
qs.add(new TermQuery(newPathTerm(getParentPath(path))));
}
break;
case NO_RESTRICTION:
break;
}
// Fulltext index definition used by LuceneIndex only works with old format
// which is not nodeType based. So just use the nt:base index
IndexingRule rule = indexDefinition.getApplicableIndexingRule(JcrConstants.NT_BASE);
for (PropertyRestriction pr : filter.getPropertyRestrictions()) {
if (pr.first == null && pr.last == null) {
// queries (OAK-1208)
continue;
}
// check excluded properties and types
if (isExcludedProperty(pr, rule)) {
continue;
}
String name = pr.propertyName;
if (QueryConstants.REP_EXCERPT.equals(name) || QueryConstants.OAK_SCORE_EXPLANATION.equals(name) || QueryConstants.REP_FACET.equals(name)) {
continue;
}
if (JCR_PRIMARYTYPE.equals(name)) {
continue;
}
if (QueryConstants.RESTRICTION_LOCAL_NAME.equals(name)) {
continue;
}
if (skipTokenization(name)) {
qs.add(new TermQuery(new Term(name, pr.first.getValue(STRING))));
continue;
}
String first = null;
String last = null;
boolean isLike = pr.isLike;
// TODO what to do with escaped tokens?
if (pr.first != null) {
first = pr.first.getValue(STRING);
first = first.replace("\\", "");
}
if (pr.last != null) {
last = pr.last.getValue(STRING);
last = last.replace("\\", "");
}
if (isLike) {
first = first.replace('%', WildcardQuery.WILDCARD_STRING);
first = first.replace('_', WildcardQuery.WILDCARD_CHAR);
int indexOfWS = first.indexOf(WildcardQuery.WILDCARD_STRING);
int indexOfWC = first.indexOf(WildcardQuery.WILDCARD_CHAR);
int len = first.length();
if (indexOfWS == len || indexOfWC == len) {
// remove trailing "*" for prefixquery
first = first.substring(0, first.length() - 1);
if (JCR_PATH.equals(name)) {
qs.add(new PrefixQuery(newPathTerm(first)));
} else {
qs.add(new PrefixQuery(new Term(name, first)));
}
} else {
if (JCR_PATH.equals(name)) {
qs.add(new WildcardQuery(newPathTerm(first)));
} else {
qs.add(new WildcardQuery(new Term(name, first)));
}
}
continue;
}
if (first != null && first.equals(last) && pr.firstIncluding && pr.lastIncluding) {
if (JCR_PATH.equals(name)) {
qs.add(new TermQuery(newPathTerm(first)));
} else {
if ("*".equals(name)) {
addReferenceConstraint(first, qs, reader);
} else {
for (String t : tokenize(first, analyzer)) {
qs.add(new TermQuery(new Term(name, t)));
}
}
}
continue;
}
first = tokenizeAndPoll(first, analyzer);
last = tokenizeAndPoll(last, analyzer);
qs.add(TermRangeQuery.newStringRange(name, first, last, pr.firstIncluding, pr.lastIncluding));
}
}
use of org.apache.lucene.search.WildcardQuery in project Krill by KorAP.
the class TestIndex method indexLucene.
@Test
public void indexLucene() throws Exception {
// Base analyzer for searching and indexing
StandardAnalyzer analyzer = new StandardAnalyzer();
// Based on
// http://lucene.apache.org/core/4_0_0/core/org/apache/lucene/
// analysis/Analyzer.html?is-external=true
// Create configuration with base analyzer
IndexWriterConfig config = new IndexWriterConfig(analyzer);
// Add a document 1 with the correct fields
IndexWriter w = new IndexWriter(index, config);
Collection docs = initIndexer();
@SuppressWarnings("unchecked") Iterator<Map<String, String>> i = (Iterator<Map<String, String>>) docs.iterator();
for (; i.hasNext(); ) {
addDoc(w, i.next());
}
;
assertEquals(3, w.numDocs());
w.close();
// Check directory
DirectoryReader reader = DirectoryReader.open(index);
assertEquals(docs.size(), reader.maxDoc());
assertEquals(docs.size(), reader.numDocs());
// Check searcher
IndexSearcher searcher = new IndexSearcher(reader);
// textClass
// All texts of text class "news"
assertEquals(2, searcher.search(new TermQuery(new Term("textClass", "news")), 10).totalHits);
// textClass
// All texts of text class "sports"
assertEquals(2, searcher.search(new TermQuery(new Term("textClass", "sports")), 10).totalHits);
// TextIndex
// All docs containing "l:nehmen"
assertEquals(1, searcher.search(new TermQuery(new Term("text", "l:nehmen")), 10).totalHits);
// TextIndex
// All docs containing "s:den"
assertEquals(2, searcher.search(new TermQuery(new Term("text", "s:den")), 10).totalHits);
/*
assertEquals(3,
searcher.search(
new TermQuery(
new Term("text", "T")
), 10
).totalHits
);
*/
// BooleanQuery
// All docs containing "s:den" and "l:sie"
TermQuery s_den = new TermQuery(new Term("text", "s:den"));
TermQuery l_sie = new TermQuery(new Term("text", "l:sie"));
BooleanQuery bool = new BooleanQuery();
bool.add(s_den, BooleanClause.Occur.MUST);
bool.add(l_sie, BooleanClause.Occur.MUST);
assertEquals(1, searcher.search(bool, 10).totalHits);
// BooleanQuery
// All docs containing "s:den" or "l:sie"
bool = new BooleanQuery();
bool.add(s_den, BooleanClause.Occur.SHOULD);
bool.add(l_sie, BooleanClause.Occur.SHOULD);
assertEquals(2, searcher.search(bool, 10).totalHits);
// RegexpQuery
// All docs containing ".{4}en" (liefen und Hunden)
RegexpQuery srquery = new RegexpQuery(new Term("text", "s:.{4}en"));
assertEquals(2, searcher.search(srquery, 10).totalHits);
// RegexpQuery
// All docs containing "E." (Er) (2x)
srquery = new RegexpQuery(new Term("text", "s:E."));
assertEquals(2, searcher.search(srquery, 10).totalHits);
SpanRegexQueryWrapper ssrquery = new SpanRegexQueryWrapper("text", "s:E.");
assertEquals(2, searcher.search(ssrquery.toQuery(), 10).totalHits);
// RegexpQuery
// All docs containing "E." (er) (0x)
srquery = new RegexpQuery(new Term("text", "s:e."));
assertEquals(0, searcher.search(srquery, 10).totalHits);
ssrquery = new SpanRegexQueryWrapper("text", "s:e.");
assertEquals(0, searcher.search(ssrquery.toQuery(), 10).totalHits);
// RegexpQuery
// All docs containing "E."/i ([Ee]r) (2x)
srquery = new RegexpQuery(new Term("text", "i:e."));
assertEquals(2, searcher.search(srquery, 10).totalHits);
ssrquery = new SpanRegexQueryWrapper("text", "s:e.", true);
assertEquals("SpanMultiTermQueryWrapper(text:/i:e./)", ssrquery.toQuery().toString());
assertEquals(2, searcher.search(ssrquery.toQuery(), 10).totalHits);
// All docs containing "ng"/x (Angst) (2x)
srquery = new RegexpQuery(new Term("text", "s:.*ng.*"));
assertEquals(2, searcher.search(srquery, 10).totalHits);
// Check http://comments.gmane.org/gmane.comp.jakarta.lucene.user/52283
// for Carstens question on wildcards
// Wildcardquery
// All docs containing ".{4}en" (liefen und Hunden)
WildcardQuery swquery = new WildcardQuery(new Term("text", "s:*ng*"));
assertEquals("text:s:*ng*", swquery.toString());
assertEquals(2, searcher.search(swquery, 10).totalHits);
// [base=angst]
SpanTermQuery stq = new SpanTermQuery(new Term("text", "l:angst"));
assertEquals(2, searcher.search(srquery, 10).totalHits);
// vor Angst
// [orth=vor][orth=Angst]
SpanNearQuery snquery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "s:vor")), new SpanTermQuery(new Term("text", "s:Angst")) }, 1, true);
assertEquals(1, searcher.search(snquery, 10).totalHits);
// Spannearquery [p:VVFIN][]{,5}[m:nom:sg:fem]
snquery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "p:VVFIN")), new SpanSegmentQueryWrapper("text", "m:c:nom", "m:n:sg", "m:g:fem").toQuery() }, // slop
5, // inOrder
true);
assertEquals(1, searcher.search(snquery, 10).totalHits);
// Spannearquery [p:VVFIN][m:acc:sg:masc]
snquery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "p:VVFIN")), new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "m:c:acc")), new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "m:n:sg")), new SpanTermQuery(new Term("text", "m:g:masc")) }, -1, false) }, // slop
-1, // inOrder
false) // new SpanTermQuery(new Term("text", "m:-acc:--sg:masc"))
}, // slop
0, // inOrder
true);
assertEquals(1, searcher.search(snquery, 10).totalHits);
// Spannearquery [p:VVFIN|m:3:sg:past:ind]
// Exact match!
snquery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "p:VVFIN")), new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "m:p:3")), new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "m:n:sg")), new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "m:t:past")), new SpanTermQuery(new Term("text", "m:m:ind")) }, -1, false) }, -1, false) }, -1, false) }, // slop
-1, // inOrder
false);
assertEquals(2, searcher.search(snquery, 10).totalHits);
// To make sure, this is not equal:
// Spannearquery [p:VVFIN & m:3:sg:past:ind]
// Exact match!
// Maybe it IS equal
snquery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "p:VVFIN")), new SpanTermQuery(new Term("text", "m:p:3")), new SpanTermQuery(new Term("text", "m:n:sg")), new SpanTermQuery(new Term("text", "m:t:past")), new SpanTermQuery(new Term("text", "m:m:ind")) }, // slop
-1, // inOrder
false);
assertNotEquals(2, searcher.search(snquery, 10).totalHits);
// assertEquals(2, searcher.search(snquery, 10).totalHits);
// Spannearquery [p:VVFIN & m:3:sg & past:ind]
SpanSegmentQueryWrapper sniquery = new SpanSegmentQueryWrapper("text", "p:VVFIN", "m:p:3", "m:n:sg", "m:t:past", "m:m:ind");
assertEquals(2, searcher.search(sniquery.toQuery(), 10).totalHits);
// Todo:
/*
sniquery = new SpanSegmentQuery(
"text",
"p:VVFIN",
"m:p:3",
"m:n:sg",
"m:t:past",
"m:m:ind"
);
*/
// Spannearquery [p:VVFIN][]{,5}[m:nom:sg:fem]
snquery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "p:VVFIN")), new SpanSegmentQueryWrapper("text", "m:c:nom", "m:n:sg", "m:g:fem").toQuery() }, // slop
5, // inOrder
true);
assertEquals(1, searcher.search(snquery, 10).totalHits);
sniquery = new SpanSegmentQueryWrapper("text", "p:VVFIN", "m:p:3", "m:t:past", "m:m:ind", "m:n:sg");
assertEquals(2, searcher.search(sniquery.toQuery(), 10).totalHits);
// [p = VVFIN & m:p = 3 & m:t = past & m:n != pl] or
// [p = VVFIN & m:p = 3 & m:t = past & !m:n = pl]
// TODO: Problem: What should happen in case the category does not exist?
// pssible solution: & ( m:n != pl & exists(m:n))
sniquery = new SpanSegmentQueryWrapper("text", "p:VVFIN", "m:p:3", "m:t:past");
SpanQuery snqquery = new SpanNotQuery(sniquery.toQuery(), new SpanTermQuery(new Term("text", "m:n:pl")));
assertEquals(2, searcher.search(snqquery, 10).totalHits);
// [p = NN & (m:c: = dat | m:c = acc)]
snquery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "p:NN")), new SpanOrQuery(new SpanTermQuery(new Term("text", "m:c:nom")), new SpanTermQuery(new Term("text", "m:c:acc"))) }, -1, false);
assertEquals(2, searcher.search(snqquery, 10).totalHits);
// [p = NN & !(m:c: = nom | m:c = acc)]
snqquery = new SpanNotQuery(new SpanTermQuery(new Term("text", "p:NN")), new SpanOrQuery(new SpanTermQuery(new Term("text", "m:c:nom")), new SpanTermQuery(new Term("text", "m:c:acc"))));
assertEquals(1, searcher.search(snqquery, 10).totalHits);
// [p = NN & !(m:c = nom)]
snqquery = new SpanNotQuery(new SpanTermQuery(new Term("text", "p:NN")), new SpanTermQuery(new Term("text", "m:c:nom")));
assertEquals(3, searcher.search(snqquery, 10).totalHits);
// [p=NN & !(m:c = acc)]
snqquery = new SpanNotQuery(new SpanTermQuery(new Term("text", "p:NN")), new SpanTermQuery(new Term("text", "m:c:acc")));
assertEquals(2, searcher.search(snqquery, 10).totalHits);
// [p=PPER][][p=ART]
snquery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "p:PPER")), new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "T")), new SpanTermQuery(new Term("text", "p:ART")) }, 0, true) }, 0, true);
assertEquals(1, searcher.search(snquery, 10).totalHits);
// Todo:
// [orth=się][]{2,4}[base=bać]
// [orth=się][orth!="[.!?,:]"]{,5}[base=bać]|[base=bać][base="on|ja|ty|my|wy"]?[orth=się]
// [pos=subst & orth="a.*"]{2}
// [tag=subst:sg:nom:n]
// [case==acc & case==gen] ??
// [case~acc & case~gen]
// [case~~acc]
// [base=bać][orth!=się]+[orth=się] within s
// [][][p:VAFIN] within s
// [][p:VAFIN] within s
// [][][p:VAFIN]
snquery = new SpanNearQuery(new SpanQuery[] { new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "T")), new SpanTermQuery(new Term("text", "T")) }, 0, true), new SpanTermQuery(new Term("text", "p:VAFIN")) }, 0, true);
assertEquals(1, searcher.search(snquery, 10).totalHits);
/*
http://stackoverflow.com/questions/1311199/finding-the-position-of-search-hits-from-lucene
*/
StringBuilder payloadString = new StringBuilder();
Map<Term, TermContext> termContexts = new HashMap<>();
for (LeafReaderContext atomic : reader.leaves()) {
Bits bitset = atomic.reader().getLiveDocs();
// Spans spans = NearSpansOrdered();
Spans spans = snquery.getSpans(atomic, bitset, termContexts);
while (spans.next()) {
int docid = atomic.docBase + spans.doc();
if (spans.isPayloadAvailable()) {
for (byte[] payload : spans.getPayload()) {
/* retrieve payload for current matching span */
payloadString.append(new String(payload));
payloadString.append(" | ");
}
;
}
;
}
;
}
;
// assertEquals(33, payloadString.length());
assertEquals(0, payloadString.length());
// [][][p:VAFIN]
// without collecting payloads
snquery = new SpanNearQuery(new SpanQuery[] { new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term("text", "T")), new SpanTermQuery(new Term("text", "T")) }, 0, true, false), new SpanTermQuery(new Term("text", "p:VAFIN")) }, 0, true, false);
assertEquals(1, searcher.search(snquery, 10).totalHits);
payloadString = new StringBuilder();
termContexts = new HashMap<>();
for (LeafReaderContext atomic : reader.leaves()) {
Bits bitset = atomic.reader().getLiveDocs();
// Spans spans = NearSpansOrdered();
Spans spans = snquery.getSpans(atomic, bitset, termContexts);
while (spans.next()) {
int docid = atomic.docBase + spans.doc();
for (byte[] payload : spans.getPayload()) {
/* retrieve payload for current matching span */
payloadString.append(new String(payload));
payloadString.append(" | ");
}
;
}
;
}
;
assertEquals(0, payloadString.length());
// [][][p:VAFIN] in s
// ([e:s:<][]*[T] | [T & e:s:<]) [T] ([p:VAFIN & e:s:>] | [T][]*[e:s:>]
/*
SpanSegmentWithinQuery ssequery = new SpanSegmentWithinQuery(
"text","s", new SpanSegmentSequenceQuery("text", "T", "T", "p:VAFIN")
);
assertEquals(0, searcher.search(ssequery.toQuery(), 10).totalHits);
payloadString = new StringBuilder();
termContexts = new HashMap<>();
for (LeafReaderContext atomic : reader.leaves()) {
Bits bitset = atomic.reader().getLiveDocs();
// Spans spans = NearSpansOrdered();
Spans spans = ssequery.toQuery().getSpans(atomic, bitset, termContexts);
while (spans.next()) {
int docid = atomic.docBase + spans.doc();
for (byte[] payload : spans.getPayload()) {
/// retrieve payload for current matching span
payloadString.append(new String(payload));
payloadString.append(" | ");
};
};
};
assertEquals(0, payloadString.length(), 1);
ssequery = new SpanSegmentWithinQuery(
"text","s", new SpanSegmentSequenceQuery("text", "T", "p:VAFIN")
);
assertEquals("for " + ssequery.toQuery(),
1, searcher.search(ssequery.toQuery(), 10).totalHits);
payloadString = new StringBuilder();
termContexts = new HashMap<>();
for (LeafReaderContext atomic : reader.leaves()) {
Bits bitset = atomic.reader().getLiveDocs();
// Spans spans = NearSpansOrdered();
Spans spans = ssequery.toQuery().getSpans(atomic, bitset, termContexts);
while (spans.next()) {
int docid = atomic.docBase + spans.doc();
for (byte[] payload : spans.getPayload()) {
// retrieve payload for current matching span
payloadString.append(new String(payload));
payloadString.append(" | ");
};
fail("Doc: " + docid + " with " + spans.start() + "-" + spans.end() + " || " + payloadString.toString());
};
};
assertEquals(20, payloadString.length());
*/
// --------------------______>
// Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), ssequery.toQuery());
/*
TopDocs topDocs = is.search(snq, 1);
Set<String> payloadSet = new HashSet<String>();
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
while (spans.next()) {
Collection<byte[]> payloads = spans.getPayload();
for (final byte [] payload : payloads) {
payloadSet.add(new String(payload, "UTF-8"));
}
}
}
*/
/*
Alternativ:
IndexReader reader = writer.getReader();
writer.close();
IndexSearcher searcher = newSearcher(reader);
PayloadSpanUtil psu = new PayloadSpanUtil(searcher.getTopReaderContext());
Collection<byte[]> payloads = psu.getPayloadsForQuery(new TermQuery(new Term(PayloadHelper.FIELD, "rr")));
if(VERBOSE)
System.out.println("Num payloads:" + payloads.size());
for (final byte [] bytes : payloads) {
if(VERBOSE)
System.out.println(new String(bytes, "UTF-8"));
}
*/
/* new: */
// PayloadHelper helper = new PayloadHelper();
// Map<Term, TermContext> termContexts = new HashMap<>();
// Spans spans;
// spans = snquery.getSpans(searcher.getIndexReader());
// searcher = helper.setUp(similarity, 1000);
/*
IndexReader reader = search.getReader(querycontainer.getFoundry());
Spans luceneSpans;
Bits bitset = atomic.reader().getLiveDocs();
for (byte[] payload : luceneSpans.getPayload())
/* Iterate over all matching documents */
/*
while (luceneSpans.next() && total < config.getMaxhits()) {
Span matchSpan;
StringBuilder payloadString = new StringBuilder();
int docid = atomic.docBase + luceneSpans.doc();
String docname = search.retrieveDocname(docid,
querycontainer.getFoundry());
total++;
for (byte[] payload : luceneSpans.getPayload())
*/
/* retrieve payload for current matching span */
// payloadString.append(new String(payload));
/* create span containing result */
/*
matchSpan = new Span(docname);
matchSpan.setIndexdocid(docid);
matchSpan.setLayer(querycontainer.getLayer());
matchSpan.storePayloads(payloadString.toString());
matchSpans.add(matchSpan);
*/
/*
* topdocs = searcher.search(new ConstantScoreQuery(corpusQ add
* position to list of positions to be considered for later
* searches
*/
/*
validValues.put(docname,
matchSpan.getPayload(config.getPrefix()));
}
*/
// Todo: API made by add() typisiert für queries, strings
// SpanPayloadCheckQuery for sentences!
/* Support regular expression in SpanSegmentQuery */
// new Regexp();
// new Term();
/*
Vielleicht: spanSegmentQuery(new Term(), new Wildcard(), new Regex());
*/
// And Not ->
// SpanTermDiffQuery
/*
SpanNearQuery poquery = new SpanNearQuery(
);
*/
reader.close();
}
Aggregations