use of com.zimbra.cs.index.ZimbraIndexReader.TermFieldEnumeration in project zm-mailbox by Zimbra.
the class AbstractIndexStoreTest method termEnum.
/**
* The result of getTermsForField can be good for seeing the effects of {@code ZimbraAnalyzer} on how fields get
* tokenized. TODO: Add tests for different types of tokenizers.
* @throws Exception
*/
@Test
public void termEnum() throws Exception {
ZimbraLog.test.debug("--->TEST termEnum");
Mailbox mbox = MailboxManager.getInstance().getMailboxByAccountId(MockProvisioning.DEFAULT_ACCOUNT_ID);
createContact(mbox, "teSt1@ziMBRA.com");
createContact(mbox, "test2@zimbra.com");
// Make sure all indexing has been done
mbox.index.indexDeferredItems();
IndexStore index = mbox.index.getIndexStore();
ZimbraIndexSearcher searcher = index.openSearcher();
// Note that TermFieldEnumeration order is defined to be sorted
TermFieldEnumeration fields = null;
try {
fields = searcher.getIndexReader().getTermsForField(LuceneFields.L_CONTACT_DATA, "");
checkNextTerm(fields, new Term(LuceneFields.L_CONTACT_DATA, "@zimbra"));
checkNextTerm(fields, new Term(LuceneFields.L_CONTACT_DATA, "@zimbra.com"));
checkNextTerm(fields, new Term(LuceneFields.L_CONTACT_DATA, "test1"));
checkNextTerm(fields, new Term(LuceneFields.L_CONTACT_DATA, "test1@zimbra.com"));
checkNextTerm(fields, new Term(LuceneFields.L_CONTACT_DATA, "test2"));
checkNextTerm(fields, new Term(LuceneFields.L_CONTACT_DATA, "test2@zimbra.com"));
checkNextTerm(fields, new Term(LuceneFields.L_CONTACT_DATA, "zimbra"));
checkNextTerm(fields, new Term(LuceneFields.L_CONTACT_DATA, "zimbra.com"));
checkAtEnd(fields, LuceneFields.L_CONTACT_DATA);
} finally {
Closeables.closeQuietly(fields);
}
fields = null;
try {
// l.content values:
// "test1@zimbra.com test1 @zimbra.com zimbra.com zimbra @zimbra "
// "test2@zimbra.com test2 @zimbra.com zimbra.com zimbra @zimbra "
fields = searcher.getIndexReader().getTermsForField(LuceneFields.L_CONTENT, "");
checkNextTerm(fields, new Term(LuceneFields.L_CONTENT, "test1"));
checkNextTerm(fields, new Term(LuceneFields.L_CONTENT, "test1@zimbra.com"));
checkNextTerm(fields, new Term(LuceneFields.L_CONTENT, "test2"));
checkNextTerm(fields, new Term(LuceneFields.L_CONTENT, "test2@zimbra.com"));
checkNextTerm(fields, new Term(LuceneFields.L_CONTENT, "zimbra"));
checkNextTerm(fields, new Term(LuceneFields.L_CONTENT, "zimbra.com"));
checkAtEnd(fields, LuceneFields.L_CONTENT);
} finally {
Closeables.closeQuietly(fields);
}
fields = null;
try {
fields = searcher.getIndexReader().getTermsForField(LuceneFields.L_FIELD, "");
checkNextTerm(fields, new Term(LuceneFields.L_FIELD, "email:test1@zimbra.com"));
checkNextTerm(fields, new Term(LuceneFields.L_FIELD, "email:test2@zimbra.com"));
checkAtEnd(fields, LuceneFields.L_FIELD);
} finally {
Closeables.closeQuietly(fields);
}
fields = null;
try {
fields = searcher.getIndexReader().getTermsForField(LuceneFields.L_PARTNAME, "");
checkNextTerm(fields, new Term(LuceneFields.L_PARTNAME, "CONTACT"));
checkAtEnd(fields, LuceneFields.L_PARTNAME);
} finally {
Closeables.closeQuietly(fields);
}
fields = null;
try {
fields = searcher.getIndexReader().getTermsForField(LuceneFields.L_H_TO, "");
checkNextTerm(fields, new Term(LuceneFields.L_H_TO, "@zimbra"));
checkNextTerm(fields, new Term(LuceneFields.L_H_TO, "@zimbra.com"));
checkNextTerm(fields, new Term(LuceneFields.L_H_TO, "test1"));
checkNextTerm(fields, new Term(LuceneFields.L_H_TO, "test1@zimbra.com"));
checkNextTerm(fields, new Term(LuceneFields.L_H_TO, "test2"));
checkNextTerm(fields, new Term(LuceneFields.L_H_TO, "test2@zimbra.com"));
checkNextTerm(fields, new Term(LuceneFields.L_H_TO, "zimbra"));
checkNextTerm(fields, new Term(LuceneFields.L_H_TO, "zimbra.com"));
checkAtEnd(fields, LuceneFields.L_H_TO);
} finally {
Closeables.closeQuietly(fields);
}
fields = null;
try {
fields = searcher.getIndexReader().getTermsForField(LuceneFields.L_H_TO, "tess");
checkNextTerm(fields, new Term(LuceneFields.L_H_TO, "test1"));
checkNextTerm(fields, new Term(LuceneFields.L_H_TO, "test1@zimbra.com"));
checkNextTerm(fields, new Term(LuceneFields.L_H_TO, "test2"));
checkNextTerm(fields, new Term(LuceneFields.L_H_TO, "test2@zimbra.com"));
checkNextTerm(fields, new Term(LuceneFields.L_H_TO, "zimbra"));
checkNextTerm(fields, new Term(LuceneFields.L_H_TO, "zimbra.com"));
checkAtEnd(fields, LuceneFields.L_H_TO + "(sublist)");
} finally {
Closeables.closeQuietly(fields);
}
fields = null;
try {
fields = searcher.getIndexReader().getTermsForField(LuceneFields.L_SORT_DATE, "");
checkNextTermFieldType(fields, LuceneFields.L_SORT_DATE);
// TODO: ElasticSearch has more. Not sure why and not sure it matters
// checkAtEnd(fields, LuceneFields.L_SORT_DATE);
} finally {
Closeables.closeQuietly(fields);
}
fields = null;
try {
fields = searcher.getIndexReader().getTermsForField(LuceneFields.L_MAILBOX_BLOB_ID, "");
checkNextTermFieldType(fields, LuceneFields.L_MAILBOX_BLOB_ID);
checkNextTermFieldType(fields, LuceneFields.L_MAILBOX_BLOB_ID);
// TODO: ElasticSearch has more. Investigate? Believe it relates to fact that is a number field
// Numbers have an associated precision step (number of terms generated for each number value)
// which defaults to 4.
// checkAtEnd(fields, LuceneFields.L_MAILBOX_BLOB_ID);
} finally {
Closeables.closeQuietly(fields);
}
searcher.close();
}
use of com.zimbra.cs.index.ZimbraIndexReader.TermFieldEnumeration in project zm-mailbox by Zimbra.
the class MailboxIndex method getAttachmentTypes.
/**
* Returns all attachment types from the index.
*
* @param regex matching pattern or null to match everything
* @return {@link BrowseTerm}s which correspond to all of the attachment types in the index
*/
public List<BrowseTerm> getAttachmentTypes(String regex) throws IOException, ServiceException {
Pattern pattern = Strings.isNullOrEmpty(regex) ? null : Pattern.compile(regex);
List<BrowseTerm> result = new ArrayList<BrowseTerm>();
ZimbraIndexSearcher searcher = indexStore.openSearcher();
TermFieldEnumeration values = null;
try {
values = searcher.getIndexReader().getTermsForField(LuceneFields.L_ATTACHMENTS, "");
while (values.hasMoreElements()) {
BrowseTerm term = values.nextElement();
if (pattern == null || AccessBoundedRegex.matches(term.getText(), pattern, MAX_REGEX_ACCESSES)) {
result.add(term);
}
}
} finally {
Closeables.closeQuietly(values);
Closeables.closeQuietly(searcher);
}
return result;
}
use of com.zimbra.cs.index.ZimbraIndexReader.TermFieldEnumeration in project zm-mailbox by Zimbra.
the class MailboxIndex method getDomains.
/**
* Returns all domain names from the index.
*
* @param field Lucene field name (e.g. LuceneFields.L_H_CC)
* @param regex matching pattern or null to match everything
* @return {@link BrowseTerm}s which correspond to all of the domain terms stored in a given field
*/
public List<BrowseTerm> getDomains(String field, String regex) throws IOException, ServiceException {
Pattern pattern = Strings.isNullOrEmpty(regex) ? null : Pattern.compile(regex.startsWith("@") ? regex : "@" + regex);
List<BrowseTerm> result = new ArrayList<BrowseTerm>();
ZimbraIndexSearcher searcher = indexStore.openSearcher();
TermFieldEnumeration values = null;
try {
values = searcher.getIndexReader().getTermsForField(field, "");
while (values.hasMoreElements()) {
BrowseTerm term = values.nextElement();
if (term == null) {
break;
}
String text = term.getText();
// Domains are tokenized with '@' prefix. Exclude partial domain tokens.
if (text.startsWith("@") && text.contains(".")) {
if (pattern == null || AccessBoundedRegex.matches(text, pattern, MAX_REGEX_ACCESSES)) {
result.add(new BrowseTerm(text.substring(1), term.getFreq()));
}
}
}
} finally {
Closeables.closeQuietly(values);
Closeables.closeQuietly(searcher);
}
return result;
}
use of com.zimbra.cs.index.ZimbraIndexReader.TermFieldEnumeration in project zm-mailbox by Zimbra.
the class LuceneQueryOperation method expandLazyMultiPhraseQuery.
private Query expandLazyMultiPhraseQuery(Query query) throws IOException {
if (query instanceof LazyMultiPhraseQuery) {
LazyMultiPhraseQuery lazy = (LazyMultiPhraseQuery) query;
int max = LC.zimbra_index_wildcard_max_terms_expanded.intValue();
MultiPhraseQuery mquery = new MultiPhraseQuery();
for (Term[] terms : lazy.getTermArrays()) {
if (terms.length != 1) {
mquery.add(terms);
continue;
}
Term base = terms[0];
if (!lazy.expand.contains(base)) {
mquery.add(terms);
continue;
}
List<Term> expanded = Lists.newArrayList();
TermFieldEnumeration itr = searcher.getIndexReader().getTermsForField(base.field(), base.text());
try {
while (itr.hasMoreElements()) {
BrowseTerm term = itr.nextElement();
if (term != null && term.getText().startsWith(base.text())) {
if (expanded.size() >= max) {
// too many terms expanded
break;
}
expanded.add(new Term(base.field(), term.getText()));
} else {
break;
}
}
} finally {
Closeables.closeQuietly(itr);
}
if (expanded.isEmpty()) {
return null;
} else {
mquery.add(expanded.toArray(new Term[expanded.size()]));
}
}
return mquery;
} else if (query instanceof BooleanQuery) {
ListIterator<BooleanClause> itr = ((BooleanQuery) query).clauses().listIterator();
while (itr.hasNext()) {
BooleanClause clause = itr.next();
Query result = expandLazyMultiPhraseQuery(clause.getQuery());
if (result == null) {
if (clause.isRequired()) {
return null;
} else {
itr.remove();
}
} else if (result != clause.getQuery()) {
clause.setQuery(result);
}
}
return ((BooleanQuery) query).clauses().isEmpty() ? null : query;
} else {
return query;
}
}
use of com.zimbra.cs.index.ZimbraIndexReader.TermFieldEnumeration in project zm-mailbox by Zimbra.
the class MailboxIndex method existsInContacts.
/**
* Returns true if any of the specified email addresses exists in contacts, otherwise false.
*/
public boolean existsInContacts(Collection<InternetAddress> addrs) throws IOException {
Set<MailItem.Type> types = EnumSet.of(MailItem.Type.CONTACT);
if (getDeferredCount(types) > 0) {
try {
indexDeferredItems(types, new BatchStatus(), false);
} catch (ServiceException e) {
ZimbraLog.index.error("Failed to index deferred items", e);
}
}
ZimbraIndexSearcher searcher = indexStore.openSearcher();
try {
for (InternetAddress addr : addrs) {
if (!Strings.isNullOrEmpty(addr.getAddress())) {
String lcAddr = addr.getAddress().toLowerCase();
TermFieldEnumeration values = null;
try {
values = searcher.getIndexReader().getTermsForField(LuceneFields.L_CONTACT_DATA, lcAddr);
if (values.hasMoreElements()) {
BrowseTerm term = values.nextElement();
if (term != null && lcAddr.equals(term.getText())) {
ZimbraLog.index.debug("Contact = %s present in indexed items", lcAddr);
return true;
}
}
} finally {
Closeables.closeQuietly(values);
}
}
}
return false;
} finally {
Closeables.closeQuietly(searcher);
}
}
Aggregations