Examples with IndexDocument - com.zimbra.cs.index.IndexDocument

Example 1 with IndexDocument

use of com.zimbra.cs.index.IndexDocument in project zm-mailbox by Zimbra.

the class Document method generateIndexData.

@Override
public List<IndexDocument> generateIndexData() throws TemporaryIndexingException {
    try {
        MailboxBlob mblob = getBlob();
        if (mblob == null) {
            ZimbraLog.index.warn("Unable to fetch blob for Document id=%d,ver=%d,vol=%s", mId, mVersion, getLocator());
            throw new MailItem.TemporaryIndexingException();
        }
        ParsedDocument pd = null;
        pd = new ParsedDocument(mblob.getLocalBlob(), getName(), getContentType(), getChangeDate(), getCreator(), getDescription(), isDescriptionEnabled());
        if (pd.hasTemporaryAnalysisFailure()) {
            throw new MailItem.TemporaryIndexingException();
        }
        IndexDocument doc = pd.getDocument();
        if (doc != null) {
            List<IndexDocument> toRet = new ArrayList<IndexDocument>(1);
            toRet.add(doc);
            return toRet;
        } else {
            return new ArrayList<IndexDocument>(0);
        }
    } catch (IOException e) {
        ZimbraLog.index.warn("Error generating index data for Wiki Document " + getId() + ". Item will not be indexed", e);
        return new ArrayList<IndexDocument>(0);
    } catch (ServiceException e) {
        ZimbraLog.index.warn("Error generating index data for Wiki Document " + getId() + ". Item will not be indexed", e);
        return new ArrayList<IndexDocument>(0);
    }
}

Also used : IndexDocument(com.zimbra.cs.index.IndexDocument) MailboxBlob(com.zimbra.cs.store.MailboxBlob) ParsedDocument(com.zimbra.cs.mime.ParsedDocument) ServiceException(com.zimbra.common.service.ServiceException) ArrayList(java.util.ArrayList) IOException(java.io.IOException)

Example 2 with IndexDocument

use of com.zimbra.cs.index.IndexDocument in project zm-mailbox by Zimbra.

the class ParsedMessage method analyzePart.

/**
 * @return Extracted toplevel text (any text that should go into the toplevel indexed document)
 */
private String analyzePart(boolean isMainBody, MPartInfo mpi) throws MessagingException, ServiceException {
    boolean ignoreCalendar;
    if (calendarPartInfo == null) {
        ignoreCalendar = isBouncedCalendar(mpi);
    } else {
        ignoreCalendar = true;
    }
    String methodParam = (new ContentType(mpi.getMimePart().getContentType())).getParameter("method");
    if (methodParam == null && !LC.calendar_allow_invite_without_method.booleanValue()) {
        ignoreCalendar = true;
    }
    String toRet = "";
    try {
        // ignore multipart "container" parts
        if (mpi.isMultipart()) {
            return toRet;
        }
        String ctype = mpi.getContentType();
        MimeHandler handler = MimeHandlerManager.getMimeHandler(ctype, mpi.getFilename());
        assert (handler != null);
        handler.setDefaultCharset(defaultCharset);
        Mime.repairTransferEncoding(mpi.getMimePart());
        if (handler.isIndexingEnabled()) {
            handler.init(mpi.getMimePart().getDataHandler().getDataSource());
            handler.setPartName(mpi.getPartName());
            handler.setFilename(mpi.getFilename());
            handler.setSize(mpi.getSize());
            // remember the first iCalendar attachment
            if (!ignoreCalendar && calendarPartInfo == null) {
                ZVCalendar cal = handler.getICalendar();
                if (cal != null) {
                    setCalendarPartInfo(mpi, cal);
                }
            }
            // - IndexAttachments was set and !disableIndexingAttachmentsTogether
            if ((isMainBody && (!handler.runsExternally() || indexAttachments)) || (indexAttachments && !DebugConfig.disableIndexingAttachmentsTogether)) {
                toRet = handler.getContent();
            }
            if (indexAttachments && !DebugConfig.disableIndexingAttachmentsSeparately) {
                // Each non-text MIME part is also indexed as a separate
                // Lucene document.  This is necessary so that we can tell the
                // client what parts match if a search matched a particular
                // part.
                IndexDocument doc = new IndexDocument(handler.getDocument());
                String filename = handler.getFilename();
                if (!Strings.isNullOrEmpty(filename)) {
                    filenames.add(filename);
                }
                doc.addSortSize(mpi.getMimePart().getSize());
                luceneDocuments.add(setLuceneHeadersFromContainer(doc));
            }
        }
        // make sure we've got the text/calendar handler installed
        if (!ignoreCalendar && calendarPartInfo == null && ctype.equals(MimeConstants.CT_TEXT_CALENDAR)) {
            if (handler.isIndexingEnabled()) {
                ZimbraLog.index.warn("TextCalendarHandler not correctly installed");
            }
            InputStream is = null;
            try {
                String charset = mpi.getContentTypeParameter(MimeConstants.P_CHARSET);
                if (charset == null || charset.trim().isEmpty()) {
                    charset = MimeConstants.P_CHARSET_DEFAULT;
                }
                is = mpi.getMimePart().getInputStream();
                ZVCalendar cal = ZCalendarBuilder.build(is, charset);
                if (cal != null) {
                    setCalendarPartInfo(mpi, cal);
                }
            } catch (IOException ioe) {
                ZimbraLog.index.warn("error reading text/calendar mime part", ioe);
            } finally {
                ByteUtil.closeStream(is);
            }
        }
    } catch (MimeHandlerException e) {
        handleParseError(mpi, e);
    } catch (ObjectHandlerException e) {
        handleParseError(mpi, e);
    }
    return toRet;
}

Also used : IndexDocument(com.zimbra.cs.index.IndexDocument) ZVCalendar(com.zimbra.common.calendar.ZCalendar.ZVCalendar) ContentType(com.zimbra.common.mime.ContentType) GZIPInputStream(java.util.zip.GZIPInputStream) SharedInputStream(javax.mail.internet.SharedInputStream) SharedByteArrayInputStream(javax.mail.util.SharedByteArrayInputStream) BlobInputStream(com.zimbra.cs.store.BlobInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) IOException(java.io.IOException) ObjectHandlerException(com.zimbra.cs.object.ObjectHandlerException)

Example 3 with IndexDocument

use of com.zimbra.cs.index.IndexDocument in project zm-mailbox by Zimbra.

the class ParsedContact method getPrimaryDocument.

private IndexDocument getPrimaryDocument(Account acct, String contentStrIn) throws ServiceException {
    StringBuilder contentText = new StringBuilder();
    String[] emailFields = Contact.getEmailFields(acct);
    FieldTokenStream fields = new FieldTokenStream();
    for (Map.Entry<String, String> entry : getFields().entrySet()) {
        String fieldName = entry.getKey();
        // Ignore these fields as they can either be too big or containing encoded data.
        if (Contact.isSMIMECertField(fieldName) || ContactConstants.A_member.equals(fieldName) || ContactConstants.A_groupMember.equals(fieldName)) {
            continue;
        }
        if (!Contact.isEmailField(emailFields, fieldName)) {
            // skip email addrs, they're added to CONTENT below
            if (!ContactConstants.A_fileAs.equalsIgnoreCase(fieldName))
                contentText.append(entry.getValue()).append(' ');
        }
        fields.add(fieldName, entry.getValue());
    }
    // fetch all the 'email' addresses for this contact into a single concatenated string
    // We don't index members in a contact group because it's only confusing when searching.
    StringBuilder emails = new StringBuilder();
    for (String email : Contact.getEmailAddresses(emailFields, getFields(), DerefGroupMembersOption.NONE)) {
        emails.append(email).append(',');
    }
    RFC822AddressTokenStream to = new RFC822AddressTokenStream(emails.toString());
    String emailStrTokens = StringUtil.join(" ", to.getAllTokens());
    StringBuilder searchText = new StringBuilder(emailStrTokens).append(' ');
    appendContactField(searchText, this, ContactConstants.A_company);
    appendContactField(searchText, this, ContactConstants.A_phoneticCompany);
    appendContactField(searchText, this, ContactConstants.A_firstName);
    appendContactField(searchText, this, ContactConstants.A_phoneticFirstName);
    appendContactField(searchText, this, ContactConstants.A_lastName);
    appendContactField(searchText, this, ContactConstants.A_phoneticLastName);
    appendContactField(searchText, this, ContactConstants.A_nickname);
    appendContactField(searchText, this, ContactConstants.A_fullName);
    // rebuild contentText here with the emailStr FIRST, then the other text.
    // The email addresses should be first so that they have a higher search score than the other
    // text
    contentText = new StringBuilder(emailStrTokens).append(' ').append(contentText).append(' ').append(contentStrIn);
    IndexDocument doc = new IndexDocument();
    /* put the email addresses in the "To" field so they can be more easily searched */
    doc.addTo(to);
    /* put the name in the "From" field since the MailItem table uses 'Sender'*/
    doc.addFrom(new RFC822AddressTokenStream(Contact.getFileAsString(contactFields)));
    /* bug 11831 - put contact searchable data in its own field so wildcard search works better  */
    doc.addContactData(searchText.toString());
    doc.addContent(contentText.toString());
    doc.addPartName(LuceneFields.L_PARTNAME_CONTACT);
    // add key:value pairs to the structured FIELD Lucene field
    doc.addField(fields);
    return doc;
}

Also used : IndexDocument(com.zimbra.cs.index.IndexDocument) FieldTokenStream(com.zimbra.cs.index.analysis.FieldTokenStream) RFC822AddressTokenStream(com.zimbra.cs.index.analysis.RFC822AddressTokenStream) HashMap(java.util.HashMap) Map(java.util.Map)

Example 4 with IndexDocument

use of com.zimbra.cs.index.IndexDocument in project zm-mailbox by Zimbra.

the class ParsedContact method analyzeAttachment.

private void analyzeAttachment(Attachment attach, StringBuilder contentText, boolean indexAttachments) throws MimeHandlerException, ObjectHandlerException, ServiceException {
    String ctype = attach.getContentType();
    MimeHandler handler = MimeHandlerManager.getMimeHandler(ctype, attach.getFilename());
    assert (handler != null);
    if (handler.isIndexingEnabled()) {
        handler.init(attach);
        handler.setPartName(attach.getPartName());
        handler.setFilename(attach.getFilename());
        handler.setSize(attach.getSize());
        if (indexAttachments && !DebugConfig.disableIndexingAttachmentsTogether) {
            // add ALL TEXT from EVERY PART to the toplevel body content.
            // This is necessary for queries with multiple words -- where
            // one word is in the body and one is in a sub-attachment.
            // 
            // If attachment indexing is disabled, then we only add the main body and
            // text parts...
            contentText.append(contentText.length() == 0 ? "" : " ").append(handler.getContent());
        }
        if (indexAttachments && !DebugConfig.disableIndexingAttachmentsSeparately) {
            // Each non-text MIME part is also indexed as a separate
            // Lucene document.  This is necessary so that we can tell the
            // client what parts match if a search matched a particular
            // part.
            org.apache.lucene.document.Document doc = handler.getDocument();
            if (doc != null) {
                IndexDocument idoc = new IndexDocument(doc);
                idoc.addSortSize(attach.getSize());
                indexDocs.add(idoc);
            }
        }
    }
}

Also used : IndexDocument(com.zimbra.cs.index.IndexDocument)

Example 5 with IndexDocument

use of com.zimbra.cs.index.IndexDocument in project zm-mailbox by Zimbra.

the class ParsedMessage method handleParseError.

/**
 * Log the error and index minimum information.
 *
 * @param mpi MIME info
 * @param error error to handle
 */
private void handleParseError(MPartInfo mpi, Throwable error) {
    numParseErrors++;
    LOG.warn("Unable to parse part=%s filename=%s content-type=%s message-id=%s", mpi.getPartName(), mpi.getFilename(), mpi.getContentType(), getMessageID(), error);
    if (ConversionException.isTemporaryCauseOf(error)) {
        temporaryAnalysisFailure = true;
    }
    if (!Strings.isNullOrEmpty(mpi.getFilename())) {
        filenames.add(mpi.getFilename());
    }
    IndexDocument doc = new IndexDocument(new Document());
    doc.addMimeType(new MimeTypeTokenStream(mpi.getContentType()));
    doc.addPartName(mpi.getPartName());
    doc.addFilename(mpi.getFilename());
    try {
        doc.addSortSize(mpi.getMimePart().getSize());
    } catch (MessagingException ignore) {
    }
    luceneDocuments.add(setLuceneHeadersFromContainer(doc));
}

Also used : IndexDocument(com.zimbra.cs.index.IndexDocument) MessagingException(javax.mail.MessagingException) MimeTypeTokenStream(com.zimbra.cs.index.analysis.MimeTypeTokenStream) Document(org.apache.lucene.document.Document) IndexDocument(com.zimbra.cs.index.IndexDocument)

Aggregations

IndexDocument (com.zimbra.cs.index.IndexDocument)14 RFC822AddressTokenStream (com.zimbra.cs.index.analysis.RFC822AddressTokenStream)5 IOException (java.io.IOException)5 ServiceException (com.zimbra.common.service.ServiceException)4 Document (org.apache.lucene.document.Document)4 FieldTokenStream (com.zimbra.cs.index.analysis.FieldTokenStream)3 MimeTypeTokenStream (com.zimbra.cs.index.analysis.MimeTypeTokenStream)3 ObjectHandlerException (com.zimbra.cs.object.ObjectHandlerException)3 ArrayList (java.util.ArrayList)3 MailServiceException (com.zimbra.cs.mailbox.MailServiceException)2 ParsedMessage (com.zimbra.cs.mime.ParsedMessage)2 MessagingException (javax.mail.MessagingException)2 MimeMessage (javax.mail.internet.MimeMessage)2 Test (org.junit.Test)2 ZVCalendar (com.zimbra.common.calendar.ZCalendar.ZVCalendar)1 ContentType (com.zimbra.common.mime.ContentType)1 ZMimeMessage (com.zimbra.common.zmime.ZMimeMessage)1 ZMimeMultipart (com.zimbra.common.zmime.ZMimeMultipart)1 Account (com.zimbra.cs.account.Account)1 ConversionException (com.zimbra.cs.convert.ConversionException)1