Search in sources :

Example 1 with TermFieldEnumeration

use of com.zimbra.cs.index.ZimbraIndexReader.TermFieldEnumeration in project zm-mailbox by Zimbra.

the class AbstractIndexStoreTest method termEnum.

/**
     * The result of getTermsForField can be good for seeing the effects of {@code ZimbraAnalyzer} on how fields get
     * tokenized. TODO:  Add tests for different types of tokenizers.
     * @throws Exception
     */
@Test
public void termEnum() throws Exception {
    ZimbraLog.test.debug("--->TEST termEnum");
    Mailbox mbox = MailboxManager.getInstance().getMailboxByAccountId(MockProvisioning.DEFAULT_ACCOUNT_ID);
    createContact(mbox, "teSt1@ziMBRA.com");
    createContact(mbox, "test2@zimbra.com");
    // Make sure all indexing has been done
    mbox.index.indexDeferredItems();
    IndexStore index = mbox.index.getIndexStore();
    ZimbraIndexSearcher searcher = index.openSearcher();
    // Note that TermFieldEnumeration order is defined to be sorted
    TermFieldEnumeration fields = null;
    try {
        fields = searcher.getIndexReader().getTermsForField(LuceneFields.L_CONTACT_DATA, "");
        checkNextTerm(fields, new Term(LuceneFields.L_CONTACT_DATA, "@zimbra"));
        checkNextTerm(fields, new Term(LuceneFields.L_CONTACT_DATA, "@zimbra.com"));
        checkNextTerm(fields, new Term(LuceneFields.L_CONTACT_DATA, "test1"));
        checkNextTerm(fields, new Term(LuceneFields.L_CONTACT_DATA, "test1@zimbra.com"));
        checkNextTerm(fields, new Term(LuceneFields.L_CONTACT_DATA, "test2"));
        checkNextTerm(fields, new Term(LuceneFields.L_CONTACT_DATA, "test2@zimbra.com"));
        checkNextTerm(fields, new Term(LuceneFields.L_CONTACT_DATA, "zimbra"));
        checkNextTerm(fields, new Term(LuceneFields.L_CONTACT_DATA, "zimbra.com"));
        checkAtEnd(fields, LuceneFields.L_CONTACT_DATA);
    } finally {
        Closeables.closeQuietly(fields);
    }
    fields = null;
    try {
        // l.content values:
        // "test1@zimbra.com test1 @zimbra.com zimbra.com zimbra @zimbra  "
        // "test2@zimbra.com test2 @zimbra.com zimbra.com zimbra @zimbra  "
        fields = searcher.getIndexReader().getTermsForField(LuceneFields.L_CONTENT, "");
        checkNextTerm(fields, new Term(LuceneFields.L_CONTENT, "test1"));
        checkNextTerm(fields, new Term(LuceneFields.L_CONTENT, "test1@zimbra.com"));
        checkNextTerm(fields, new Term(LuceneFields.L_CONTENT, "test2"));
        checkNextTerm(fields, new Term(LuceneFields.L_CONTENT, "test2@zimbra.com"));
        checkNextTerm(fields, new Term(LuceneFields.L_CONTENT, "zimbra"));
        checkNextTerm(fields, new Term(LuceneFields.L_CONTENT, "zimbra.com"));
        checkAtEnd(fields, LuceneFields.L_CONTENT);
    } finally {
        Closeables.closeQuietly(fields);
    }
    fields = null;
    try {
        fields = searcher.getIndexReader().getTermsForField(LuceneFields.L_FIELD, "");
        checkNextTerm(fields, new Term(LuceneFields.L_FIELD, "email:test1@zimbra.com"));
        checkNextTerm(fields, new Term(LuceneFields.L_FIELD, "email:test2@zimbra.com"));
        checkAtEnd(fields, LuceneFields.L_FIELD);
    } finally {
        Closeables.closeQuietly(fields);
    }
    fields = null;
    try {
        fields = searcher.getIndexReader().getTermsForField(LuceneFields.L_PARTNAME, "");
        checkNextTerm(fields, new Term(LuceneFields.L_PARTNAME, "CONTACT"));
        checkAtEnd(fields, LuceneFields.L_PARTNAME);
    } finally {
        Closeables.closeQuietly(fields);
    }
    fields = null;
    try {
        fields = searcher.getIndexReader().getTermsForField(LuceneFields.L_H_TO, "");
        checkNextTerm(fields, new Term(LuceneFields.L_H_TO, "@zimbra"));
        checkNextTerm(fields, new Term(LuceneFields.L_H_TO, "@zimbra.com"));
        checkNextTerm(fields, new Term(LuceneFields.L_H_TO, "test1"));
        checkNextTerm(fields, new Term(LuceneFields.L_H_TO, "test1@zimbra.com"));
        checkNextTerm(fields, new Term(LuceneFields.L_H_TO, "test2"));
        checkNextTerm(fields, new Term(LuceneFields.L_H_TO, "test2@zimbra.com"));
        checkNextTerm(fields, new Term(LuceneFields.L_H_TO, "zimbra"));
        checkNextTerm(fields, new Term(LuceneFields.L_H_TO, "zimbra.com"));
        checkAtEnd(fields, LuceneFields.L_H_TO);
    } finally {
        Closeables.closeQuietly(fields);
    }
    fields = null;
    try {
        fields = searcher.getIndexReader().getTermsForField(LuceneFields.L_H_TO, "tess");
        checkNextTerm(fields, new Term(LuceneFields.L_H_TO, "test1"));
        checkNextTerm(fields, new Term(LuceneFields.L_H_TO, "test1@zimbra.com"));
        checkNextTerm(fields, new Term(LuceneFields.L_H_TO, "test2"));
        checkNextTerm(fields, new Term(LuceneFields.L_H_TO, "test2@zimbra.com"));
        checkNextTerm(fields, new Term(LuceneFields.L_H_TO, "zimbra"));
        checkNextTerm(fields, new Term(LuceneFields.L_H_TO, "zimbra.com"));
        checkAtEnd(fields, LuceneFields.L_H_TO + "(sublist)");
    } finally {
        Closeables.closeQuietly(fields);
    }
    fields = null;
    try {
        fields = searcher.getIndexReader().getTermsForField(LuceneFields.L_SORT_DATE, "");
        checkNextTermFieldType(fields, LuceneFields.L_SORT_DATE);
    // TODO:  ElasticSearch has more.  Not sure why and not sure it matters
    // checkAtEnd(fields, LuceneFields.L_SORT_DATE);
    } finally {
        Closeables.closeQuietly(fields);
    }
    fields = null;
    try {
        fields = searcher.getIndexReader().getTermsForField(LuceneFields.L_MAILBOX_BLOB_ID, "");
        checkNextTermFieldType(fields, LuceneFields.L_MAILBOX_BLOB_ID);
        checkNextTermFieldType(fields, LuceneFields.L_MAILBOX_BLOB_ID);
    // TODO:  ElasticSearch has more.  Investigate?  Believe it relates to fact that is a number field
    // Numbers have an associated precision step (number of terms generated for each number value)
    // which defaults to 4.
    // checkAtEnd(fields, LuceneFields.L_MAILBOX_BLOB_ID);
    } finally {
        Closeables.closeQuietly(fields);
    }
    searcher.close();
}
Also used : Mailbox(com.zimbra.cs.mailbox.Mailbox) TermFieldEnumeration(com.zimbra.cs.index.ZimbraIndexReader.TermFieldEnumeration) Term(org.apache.lucene.index.Term) Test(org.junit.Test)

Example 2 with TermFieldEnumeration

use of com.zimbra.cs.index.ZimbraIndexReader.TermFieldEnumeration in project zm-mailbox by Zimbra.

the class MailboxIndex method getAttachmentTypes.

/**
     * Returns all attachment types from the index.
     *
     * @param regex matching pattern or null to match everything
     * @return {@link BrowseTerm}s which correspond to all of the attachment types in the index
     */
public List<BrowseTerm> getAttachmentTypes(String regex) throws IOException, ServiceException {
    Pattern pattern = Strings.isNullOrEmpty(regex) ? null : Pattern.compile(regex);
    List<BrowseTerm> result = new ArrayList<BrowseTerm>();
    ZimbraIndexSearcher searcher = indexStore.openSearcher();
    TermFieldEnumeration values = null;
    try {
        values = searcher.getIndexReader().getTermsForField(LuceneFields.L_ATTACHMENTS, "");
        while (values.hasMoreElements()) {
            BrowseTerm term = values.nextElement();
            if (pattern == null || AccessBoundedRegex.matches(term.getText(), pattern, MAX_REGEX_ACCESSES)) {
                result.add(term);
            }
        }
    } finally {
        Closeables.closeQuietly(values);
        Closeables.closeQuietly(searcher);
    }
    return result;
}
Also used : Pattern(java.util.regex.Pattern) BrowseTerm(com.zimbra.cs.index.BrowseTerm) TermFieldEnumeration(com.zimbra.cs.index.ZimbraIndexReader.TermFieldEnumeration) ArrayList(java.util.ArrayList) ZimbraIndexSearcher(com.zimbra.cs.index.ZimbraIndexSearcher)

Example 3 with TermFieldEnumeration

use of com.zimbra.cs.index.ZimbraIndexReader.TermFieldEnumeration in project zm-mailbox by Zimbra.

the class MailboxIndex method getDomains.

/**
     * Returns all domain names from the index.
     *
     * @param field Lucene field name (e.g. LuceneFields.L_H_CC)
     * @param regex matching pattern or null to match everything
     * @return {@link BrowseTerm}s which correspond to all of the domain terms stored in a given field
     */
public List<BrowseTerm> getDomains(String field, String regex) throws IOException, ServiceException {
    Pattern pattern = Strings.isNullOrEmpty(regex) ? null : Pattern.compile(regex.startsWith("@") ? regex : "@" + regex);
    List<BrowseTerm> result = new ArrayList<BrowseTerm>();
    ZimbraIndexSearcher searcher = indexStore.openSearcher();
    TermFieldEnumeration values = null;
    try {
        values = searcher.getIndexReader().getTermsForField(field, "");
        while (values.hasMoreElements()) {
            BrowseTerm term = values.nextElement();
            if (term == null) {
                break;
            }
            String text = term.getText();
            // Domains are tokenized with '@' prefix. Exclude partial domain tokens.
            if (text.startsWith("@") && text.contains(".")) {
                if (pattern == null || AccessBoundedRegex.matches(text, pattern, MAX_REGEX_ACCESSES)) {
                    result.add(new BrowseTerm(text.substring(1), term.getFreq()));
                }
            }
        }
    } finally {
        Closeables.closeQuietly(values);
        Closeables.closeQuietly(searcher);
    }
    return result;
}
Also used : Pattern(java.util.regex.Pattern) BrowseTerm(com.zimbra.cs.index.BrowseTerm) TermFieldEnumeration(com.zimbra.cs.index.ZimbraIndexReader.TermFieldEnumeration) ArrayList(java.util.ArrayList) ZimbraIndexSearcher(com.zimbra.cs.index.ZimbraIndexSearcher)

Example 4 with TermFieldEnumeration

use of com.zimbra.cs.index.ZimbraIndexReader.TermFieldEnumeration in project zm-mailbox by Zimbra.

the class LuceneQueryOperation method expandLazyMultiPhraseQuery.

private Query expandLazyMultiPhraseQuery(Query query) throws IOException {
    if (query instanceof LazyMultiPhraseQuery) {
        LazyMultiPhraseQuery lazy = (LazyMultiPhraseQuery) query;
        int max = LC.zimbra_index_wildcard_max_terms_expanded.intValue();
        MultiPhraseQuery mquery = new MultiPhraseQuery();
        for (Term[] terms : lazy.getTermArrays()) {
            if (terms.length != 1) {
                mquery.add(terms);
                continue;
            }
            Term base = terms[0];
            if (!lazy.expand.contains(base)) {
                mquery.add(terms);
                continue;
            }
            List<Term> expanded = Lists.newArrayList();
            TermFieldEnumeration itr = searcher.getIndexReader().getTermsForField(base.field(), base.text());
            try {
                while (itr.hasMoreElements()) {
                    BrowseTerm term = itr.nextElement();
                    if (term != null && term.getText().startsWith(base.text())) {
                        if (expanded.size() >= max) {
                            // too many terms expanded
                            break;
                        }
                        expanded.add(new Term(base.field(), term.getText()));
                    } else {
                        break;
                    }
                }
            } finally {
                Closeables.closeQuietly(itr);
            }
            if (expanded.isEmpty()) {
                return null;
            } else {
                mquery.add(expanded.toArray(new Term[expanded.size()]));
            }
        }
        return mquery;
    } else if (query instanceof BooleanQuery) {
        ListIterator<BooleanClause> itr = ((BooleanQuery) query).clauses().listIterator();
        while (itr.hasNext()) {
            BooleanClause clause = itr.next();
            Query result = expandLazyMultiPhraseQuery(clause.getQuery());
            if (result == null) {
                if (clause.isRequired()) {
                    return null;
                } else {
                    itr.remove();
                }
            } else if (result != clause.getQuery()) {
                clause.setQuery(result);
            }
        }
        return ((BooleanQuery) query).clauses().isEmpty() ? null : query;
    } else {
        return query;
    }
}
Also used : BooleanClause(org.apache.lucene.search.BooleanClause) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) TermFieldEnumeration(com.zimbra.cs.index.ZimbraIndexReader.TermFieldEnumeration) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) Term(org.apache.lucene.index.Term) ListIterator(java.util.ListIterator)

Example 5 with TermFieldEnumeration

use of com.zimbra.cs.index.ZimbraIndexReader.TermFieldEnumeration in project zm-mailbox by Zimbra.

the class MailboxIndex method existsInContacts.

/**
     * Returns true if any of the specified email addresses exists in contacts, otherwise false.
     */
public boolean existsInContacts(Collection<InternetAddress> addrs) throws IOException {
    Set<MailItem.Type> types = EnumSet.of(MailItem.Type.CONTACT);
    if (getDeferredCount(types) > 0) {
        try {
            indexDeferredItems(types, new BatchStatus(), false);
        } catch (ServiceException e) {
            ZimbraLog.index.error("Failed to index deferred items", e);
        }
    }
    ZimbraIndexSearcher searcher = indexStore.openSearcher();
    try {
        for (InternetAddress addr : addrs) {
            if (!Strings.isNullOrEmpty(addr.getAddress())) {
                String lcAddr = addr.getAddress().toLowerCase();
                TermFieldEnumeration values = null;
                try {
                    values = searcher.getIndexReader().getTermsForField(LuceneFields.L_CONTACT_DATA, lcAddr);
                    if (values.hasMoreElements()) {
                        BrowseTerm term = values.nextElement();
                        if (term != null && lcAddr.equals(term.getText())) {
                            ZimbraLog.index.debug("Contact = %s present in indexed items", lcAddr);
                            return true;
                        }
                    }
                } finally {
                    Closeables.closeQuietly(values);
                }
            }
        }
        return false;
    } finally {
        Closeables.closeQuietly(searcher);
    }
}
Also used : Type(com.zimbra.cs.mailbox.MailItem.Type) InternetAddress(com.zimbra.common.mime.InternetAddress) BrowseTerm(com.zimbra.cs.index.BrowseTerm) ServiceException(com.zimbra.common.service.ServiceException) TermFieldEnumeration(com.zimbra.cs.index.ZimbraIndexReader.TermFieldEnumeration) ZimbraIndexSearcher(com.zimbra.cs.index.ZimbraIndexSearcher)

Aggregations

TermFieldEnumeration (com.zimbra.cs.index.ZimbraIndexReader.TermFieldEnumeration)6 BrowseTerm (com.zimbra.cs.index.BrowseTerm)4 ZimbraIndexSearcher (com.zimbra.cs.index.ZimbraIndexSearcher)4 ArrayList (java.util.ArrayList)3 Pattern (java.util.regex.Pattern)3 Term (org.apache.lucene.index.Term)2 InternetAddress (com.zimbra.common.mime.InternetAddress)1 ServiceException (com.zimbra.common.service.ServiceException)1 Type (com.zimbra.cs.mailbox.MailItem.Type)1 Mailbox (com.zimbra.cs.mailbox.Mailbox)1 ListIterator (java.util.ListIterator)1 BooleanClause (org.apache.lucene.search.BooleanClause)1 BooleanQuery (org.apache.lucene.search.BooleanQuery)1 MultiPhraseQuery (org.apache.lucene.search.MultiPhraseQuery)1 Query (org.apache.lucene.search.Query)1 TermQuery (org.apache.lucene.search.TermQuery)1 Test (org.junit.Test)1