Search in sources :

Example 1 with ObjectHandlerException

use of com.zimbra.cs.object.ObjectHandlerException in project zm-mailbox by Zimbra.

the class ParsedMessage method analyzePart.

/**
 * @return Extracted toplevel text (any text that should go into the toplevel indexed document)
 */
private String analyzePart(boolean isMainBody, MPartInfo mpi) throws MessagingException, ServiceException {
    boolean ignoreCalendar;
    if (calendarPartInfo == null) {
        ignoreCalendar = isBouncedCalendar(mpi);
    } else {
        ignoreCalendar = true;
    }
    String methodParam = (new ContentType(mpi.getMimePart().getContentType())).getParameter("method");
    if (methodParam == null && !LC.calendar_allow_invite_without_method.booleanValue()) {
        ignoreCalendar = true;
    }
    String toRet = "";
    try {
        // ignore multipart "container" parts
        if (mpi.isMultipart()) {
            return toRet;
        }
        String ctype = mpi.getContentType();
        MimeHandler handler = MimeHandlerManager.getMimeHandler(ctype, mpi.getFilename());
        assert (handler != null);
        handler.setDefaultCharset(defaultCharset);
        Mime.repairTransferEncoding(mpi.getMimePart());
        if (handler.isIndexingEnabled()) {
            handler.init(mpi.getMimePart().getDataHandler().getDataSource());
            handler.setPartName(mpi.getPartName());
            handler.setFilename(mpi.getFilename());
            handler.setSize(mpi.getSize());
            // remember the first iCalendar attachment
            if (!ignoreCalendar && calendarPartInfo == null) {
                ZVCalendar cal = handler.getICalendar();
                if (cal != null) {
                    setCalendarPartInfo(mpi, cal);
                }
            }
            // - IndexAttachments was set and !disableIndexingAttachmentsTogether
            if ((isMainBody && (!handler.runsExternally() || indexAttachments)) || (indexAttachments && !DebugConfig.disableIndexingAttachmentsTogether)) {
                toRet = handler.getContent();
            }
            if (indexAttachments && !DebugConfig.disableIndexingAttachmentsSeparately) {
                // Each non-text MIME part is also indexed as a separate
                // Lucene document.  This is necessary so that we can tell the
                // client what parts match if a search matched a particular
                // part.
                IndexDocument doc = new IndexDocument(handler.getDocument());
                String filename = handler.getFilename();
                if (!Strings.isNullOrEmpty(filename)) {
                    filenames.add(filename);
                }
                doc.addSortSize(mpi.getMimePart().getSize());
                luceneDocuments.add(setLuceneHeadersFromContainer(doc));
            }
        }
        // make sure we've got the text/calendar handler installed
        if (!ignoreCalendar && calendarPartInfo == null && ctype.equals(MimeConstants.CT_TEXT_CALENDAR)) {
            if (handler.isIndexingEnabled()) {
                ZimbraLog.index.warn("TextCalendarHandler not correctly installed");
            }
            InputStream is = null;
            try {
                String charset = mpi.getContentTypeParameter(MimeConstants.P_CHARSET);
                if (charset == null || charset.trim().isEmpty()) {
                    charset = MimeConstants.P_CHARSET_DEFAULT;
                }
                is = mpi.getMimePart().getInputStream();
                ZVCalendar cal = ZCalendarBuilder.build(is, charset);
                if (cal != null) {
                    setCalendarPartInfo(mpi, cal);
                }
            } catch (IOException ioe) {
                ZimbraLog.index.warn("error reading text/calendar mime part", ioe);
            } finally {
                ByteUtil.closeStream(is);
            }
        }
    } catch (MimeHandlerException e) {
        handleParseError(mpi, e);
    } catch (ObjectHandlerException e) {
        handleParseError(mpi, e);
    }
    return toRet;
}
Also used : IndexDocument(com.zimbra.cs.index.IndexDocument) ZVCalendar(com.zimbra.common.calendar.ZCalendar.ZVCalendar) ContentType(com.zimbra.common.mime.ContentType) GZIPInputStream(java.util.zip.GZIPInputStream) SharedInputStream(javax.mail.internet.SharedInputStream) SharedByteArrayInputStream(javax.mail.util.SharedByteArrayInputStream) BlobInputStream(com.zimbra.cs.store.BlobInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) IOException(java.io.IOException) ObjectHandlerException(com.zimbra.cs.object.ObjectHandlerException)

Example 2 with ObjectHandlerException

use of com.zimbra.cs.object.ObjectHandlerException in project zm-mailbox by Zimbra.

the class ParsedContact method analyzeContact.

private void analyzeContact(Account acct, boolean indexAttachments) throws ServiceException {
    if (indexDocs != null) {
        return;
    }
    indexDocs = new ArrayList<IndexDocument>();
    StringBuilder attachContent = new StringBuilder();
    ServiceException conversionError = null;
    if (contactAttachments != null) {
        for (Attachment attach : contactAttachments) {
            try {
                analyzeAttachment(attach, attachContent, indexAttachments);
            } catch (MimeHandlerException e) {
                String part = attach.getPartName();
                String ctype = attach.getContentType();
                ZimbraLog.index.warn("Parse error on attachment " + part + " (" + ctype + ")", e);
                if (conversionError == null && ConversionException.isTemporaryCauseOf(e)) {
                    conversionError = ServiceException.FAILURE("failed to analyze part", e.getCause());
                    mHasTemporaryAnalysisFailure = true;
                }
            } catch (ObjectHandlerException e) {
                String part = attach.getPartName();
                String ctype = attach.getContentType();
                ZimbraLog.index.warn("Parse error on attachment " + part + " (" + ctype + ")", e);
            }
        }
    }
    indexDocs.add(getPrimaryDocument(acct, attachContent.toString()));
}
Also used : IndexDocument(com.zimbra.cs.index.IndexDocument) ServiceException(com.zimbra.common.service.ServiceException) MailServiceException(com.zimbra.cs.mailbox.MailServiceException) Attachment(com.zimbra.cs.mailbox.Contact.Attachment) ObjectHandlerException(com.zimbra.cs.object.ObjectHandlerException)

Example 3 with ObjectHandlerException

use of com.zimbra.cs.object.ObjectHandlerException in project zm-mailbox by Zimbra.

the class ParsedMessage method getMainBodyLuceneDocument.

private IndexDocument getMainBodyLuceneDocument(StringBuilder fullContent) throws MessagingException, ServiceException {
    IndexDocument doc = new IndexDocument(new Document());
    doc.addMimeType(new MimeTypeTokenStream("message/rfc822"));
    doc.addPartName(LuceneFields.L_PARTNAME_TOP);
    doc.addFrom(getFromTokenStream());
    doc.addTo(getToTokenStream());
    doc.addCc(getCcTokenStream());
    try {
        doc.addEnvFrom(new RFC822AddressTokenStream(getMimeMessage().getHeader("X-Envelope-From", ",")));
    } catch (MessagingException ignore) {
    }
    try {
        doc.addEnvTo(new RFC822AddressTokenStream(getMimeMessage().getHeader("X-Envelope-To", ",")));
    } catch (MessagingException ignore) {
    }
    String msgId = Strings.nullToEmpty(Mime.getHeader(getMimeMessage(), "message-id"));
    if (msgId.length() > 0) {
        if (msgId.charAt(0) == '<') {
            msgId = msgId.substring(1);
        }
        if (msgId.charAt(msgId.length() - 1) == '>') {
            msgId = msgId.substring(0, msgId.length() - 1);
        }
        if (msgId.length() > 0) {
            doc.addMessageId(msgId);
        }
    }
    // iterate all the message headers, add them to the structured-field data in the index
    FieldTokenStream fields = new FieldTokenStream();
    MimeMessage mm = getMimeMessage();
    List<Part> parts = new ArrayList<Part>();
    parts.add(mm);
    try {
        if (mm.getContent() instanceof ZMimeMultipart) {
            ZMimeMultipart content = (ZMimeMultipart) mm.getContent();
            int numParts = content.getCount();
            for (int i = 0; i < numParts; i++) {
                parts.add(content.getBodyPart(i));
            }
        }
    } catch (IOException ignore) {
    }
    for (Part part : parts) {
        Enumeration<?> en = part.getAllHeaders();
        while (en.hasMoreElements()) {
            Header h = (Header) en.nextElement();
            String key = h.getName().trim();
            String value = h.getValue();
            if (value != null) {
                value = MimeUtility.unfold(value).trim();
            } else {
                value = "";
            }
            if (key.length() > 0) {
                if (value.length() == 0) {
                    // low-level tokenizer can't deal with blank header value, so we'll index
                    // some dummy value just so the header appears in the index.
                    // Users can query for the existence of the header with a query
                    // like #headername:*
                    fields.add(key, "_blank_");
                } else {
                    fields.add(key, value);
                }
            }
        }
    }
    // add key:value pairs to the structured FIELD lucene field
    doc.addField(fields);
    String subject = getSubject();
    doc.addSubject(subject);
    // add subject and from to main content for better searching
    StringBuilder contentPrepend = new StringBuilder(subject);
    // Bug 583: add all of the TOKENIZED versions of the email addresses to our CONTENT field...
    appendToContent(contentPrepend, StringUtil.join(" ", getFromTokenStream().getAllTokens()));
    appendToContent(contentPrepend, StringUtil.join(" ", getToTokenStream().getAllTokens()));
    appendToContent(contentPrepend, StringUtil.join(" ", getCcTokenStream().getAllTokens()));
    // bug 33461: add filenames to our CONTENT field
    for (String fn : filenames) {
        appendToContent(contentPrepend, ZimbraAnalyzer.getAllTokensConcatenated(LuceneFields.L_FILENAME, fn));
        // also add the non-tokenized form, so full-filename searches match
        appendToContent(contentPrepend, fn);
    }
    String text = contentPrepend.toString() + " " + fullContent.toString();
    doc.addContent(text);
    try {
        MimeHandler.getObjects(text, doc);
    } catch (ObjectHandlerException e) {
        ZimbraLog.index.warn("Unable to recognize searchable objects in message: msgid=%s,subject=%s", getMessageID(), getSubject(), e);
    }
    // Get the list of attachment content types from this message and any TNEF attachments
    doc.addAttachments(new MimeTypeTokenStream(Mime.getAttachmentTypeList(messageParts)));
    return doc;
}
Also used : IndexDocument(com.zimbra.cs.index.IndexDocument) MessagingException(javax.mail.MessagingException) MimeTypeTokenStream(com.zimbra.cs.index.analysis.MimeTypeTokenStream) ArrayList(java.util.ArrayList) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) IndexDocument(com.zimbra.cs.index.IndexDocument) RFC822AddressTokenStream(com.zimbra.cs.index.analysis.RFC822AddressTokenStream) Header(javax.mail.Header) ZMimeMessage(com.zimbra.common.zmime.ZMimeMessage) MimeMessage(javax.mail.internet.MimeMessage) Part(javax.mail.Part) FieldTokenStream(com.zimbra.cs.index.analysis.FieldTokenStream) ZMimeMultipart(com.zimbra.common.zmime.ZMimeMultipart) ObjectHandlerException(com.zimbra.cs.object.ObjectHandlerException)

Aggregations

IndexDocument (com.zimbra.cs.index.IndexDocument)3 ObjectHandlerException (com.zimbra.cs.object.ObjectHandlerException)3 IOException (java.io.IOException)2 ZVCalendar (com.zimbra.common.calendar.ZCalendar.ZVCalendar)1 ContentType (com.zimbra.common.mime.ContentType)1 ServiceException (com.zimbra.common.service.ServiceException)1 ZMimeMessage (com.zimbra.common.zmime.ZMimeMessage)1 ZMimeMultipart (com.zimbra.common.zmime.ZMimeMultipart)1 FieldTokenStream (com.zimbra.cs.index.analysis.FieldTokenStream)1 MimeTypeTokenStream (com.zimbra.cs.index.analysis.MimeTypeTokenStream)1 RFC822AddressTokenStream (com.zimbra.cs.index.analysis.RFC822AddressTokenStream)1 Attachment (com.zimbra.cs.mailbox.Contact.Attachment)1 MailServiceException (com.zimbra.cs.mailbox.MailServiceException)1 BlobInputStream (com.zimbra.cs.store.BlobInputStream)1 FileInputStream (java.io.FileInputStream)1 InputStream (java.io.InputStream)1 ArrayList (java.util.ArrayList)1 GZIPInputStream (java.util.zip.GZIPInputStream)1 Header (javax.mail.Header)1 MessagingException (javax.mail.MessagingException)1