Search in sources :

Example 16 with Blob

use of edu.stanford.muse.datacache.Blob in project epadd by ePADD.

the class Archive method redactAttachmentDetailsFromDocs.

// Redact the attachment information present in EmailDocument objects
private static void redactAttachmentDetailsFromDocs(Collection<? extends Document> allDocs, Archive archive) throws Exception {
    for (Document d : allDocs) {
        EmailDocument ed = (EmailDocument) d;
        List<Blob> redactedList = new LinkedList<>();
        if (ed.attachments.size() != 0) {
            for (Blob b : ed.attachments) {
                String fname = archive.getBlobStore().full_filename_original(b, false);
                String ext = Util.getExtension(fname);
                // Get its name ellipsized. leaving extension as it is.
                fname = Util.ellipsize(fname, 5);
                fname = fname + "." + ext;
                // Create a dummy blob object and add it to email doc at the end.
                redactedList.add(new EmailAttachmentBlob(fname, 0, null));
            }
        }
        ed.attachments = redactedList;
    }
}
Also used : Blob(edu.stanford.muse.datacache.Blob)

Example 17 with Blob

use of edu.stanford.muse.datacache.Blob in project epadd by ePADD.

the class IndexUtils method partitionAttachmentsBySize.

/*
	After getting the attachment types of interest and removing the extensions present in the excluded set (malformed extensions) partition the set of
	remaining attachments based on the following size ranges.
				Semantics: <5KB -> Number of attachments in mails of size less than 5KB
				5-20KB - Number of attachments in mails of size from 5 to 20 KB
				20-100 KB - Number of attachments in mails of size from 20 to 100 KB
				100KB - 2MB - Number of attachments in mails of size from 100 KB to 2 MB
				>2MB - Number of attachments in mails of size greater than 2MB.
	 */
private static Map<String, DetailedFacetItem> partitionAttachmentsBySize(Archive archive, Collection<? extends Document> docs, Set<String> attachmentExtensionsOfInterest) {
    Map<String, DetailedFacetItem> result = new LinkedHashMap<>();
    Pattern pattern = null;
    try {
        pattern = Pattern.compile(EmailRenderer.EXCLUDED_EXT);
    } catch (Exception e) {
        Util.report_exception(e);
        return result;
    }
    // this index is used to create dummy email doc. Here for each attachment we should create one document
    int indexToDifferentiate = 0;
    for (Document d : docs) {
        if (!(d instanceof EmailDocument))
            continue;
        EmailDocument ed = (EmailDocument) d;
        // For each attachment in this mail add a dummy document.
        for (Blob attachment : ed.attachments) {
            String ext = Util.getExtension(archive.getBlobStore().get_URL_Normalized(attachment));
            if (ext == null)
                ext = "Unidentified";
            ext = ext.toLowerCase();
            if (pattern.matcher(ext).find()) {
                // don't consider any attachment that has extension of the form [0-9]+
                continue;
            }
            if (attachmentExtensionsOfInterest != null && !attachmentExtensionsOfInterest.contains(ext))
                continue;
            // 
            // get size of the attachment.
            long size = attachment.size;
            String facetstr = "";
            String facetstrval = "";
            if (Util.filesizeCheck("1", size)) {
                facetstr = "<5KB";
                facetstrval = "1";
            } else if (Util.filesizeCheck("2", size)) {
                facetstr = "5KB - 20KB";
                facetstrval = "2";
            }
            if (Util.filesizeCheck("3", size)) {
                facetstr = "20KB - 100KB";
                facetstrval = "3";
            }
            if (Util.filesizeCheck("4", size)) {
                facetstr = "5KB - 20MB";
                facetstrval = "4";
            }
            if (Util.filesizeCheck("5", size)) {
                facetstr = ">20MB";
                facetstrval = "5";
            }
            if (result.get(facetstr) == null) {
                result.put(facetstr, new DetailedFacetItem(facetstr, "Number of attachments in this range of size", "attachmentFilesize", facetstrval));
            }
            // dfi.addDoc(ed);
            // create a dummy doc such that no two docs are same.
            EmailDocument edummy = new EmailDocument(ed.id, ed.emailSource, ed.folderName, ed.to, ed.cc, ed.bcc, ed.from, ed.getSubjectWithoutTitle(), ed.messageID + indexToDifferentiate, ed.date);
            // add it to f.
            result.get(facetstr).addDoc(edummy);
            indexToDifferentiate++;
        }
    }
    return result;
}
Also used : Pattern(java.util.regex.Pattern) Blob(edu.stanford.muse.datacache.Blob)

Example 18 with Blob

use of edu.stanford.muse.datacache.Blob in project epadd by ePADD.

the class IndexUtils method partitionAttachmentsByPerson.

/*
				Semantics: Person1 -> Number of attachments in mails where Person1 was a correspondent,
			   Person2 -> Number of attachments in mails where Person2 was a correspondent.
	 */
private static Map<Contact, DetailedFacetItem> partitionAttachmentsByPerson(Collection<? extends Document> docs, AddressBook ab) {
    Map<Contact, DetailedFacetItem> result = new LinkedHashMap<>();
    Map<Contact, Pair<String, String>> tooltip_cache = new LinkedHashMap<>();
    // this index is used to create dummy email doc. Here for each attachment we should create one document
    int indexToDifferentiate = 0;
    for (Document d : docs) {
        if (!(d instanceof EmailDocument))
            continue;
        EmailDocument ed = (EmailDocument) d;
        List<Contact> people = ed.getParticipatingContactsExceptOwn(ab);
        for (Contact c : people) {
            String s = null;
            String tooltip = null;
            Pair<String, String> p = tooltip_cache.get(c);
            if (p != null) {
                s = p.getFirst();
                tooltip = p.getSecond();
            } else {
                s = c.pickBestName();
                tooltip = c.toTooltip();
                if (ModeConfig.isPublicMode()) {
                    s = Util.maskEmailDomain(s);
                    tooltip = Util.maskEmailDomain(tooltip);
                }
                tooltip_cache.put(c, new Pair<>(s, tooltip));
            }
            DetailedFacetItem f = result.get(c);
            if (f == null) {
                // String url = "person=" + c.canonicalEmail;
                // String url = "contact=" + ab.getContactId(c);
                f = new DetailedFacetItem(s, "Number of attachments sent or received from " + tooltip, "contact", Integer.toString(ab.getContactId(c)));
                result.put(c, f);
            }
            // For each attachment in this mail add a dummy document.
            for (Blob attachment : ed.attachments) {
                // dfi.addDoc(ed);
                // create a dummy doc such that no two docs are same.
                EmailDocument edummy = new EmailDocument(ed.id, ed.emailSource, ed.folderName, ed.to, ed.cc, ed.bcc, ed.from, ed.getSubjectWithoutTitle(), ed.messageID + indexToDifferentiate, ed.date);
                // add it to f.
                f.addDoc(edummy);
                indexToDifferentiate++;
            }
        }
    }
    return result;
}
Also used : Blob(edu.stanford.muse.datacache.Blob) Contact(edu.stanford.muse.AddressBookManager.Contact)

Example 19 with Blob

use of edu.stanford.muse.datacache.Blob in project epadd by ePADD.

the class IndexUtils method partitionAttachmentsByDirection.

/*
	Semantics: sent -> (Number of attachments which were sent), Receive -> (Number of attachments which were received).
	To calculate it, if a message was sent then for each attachment in that message add a doc in DetailedFacetItem to reflect the count.
	If a message was received then for each attachment in that message add a doc in DetailedFacetItem to reflect the count.
	 */
private static Map<String, DetailedFacetItem> partitionAttachmentsByDirection(Collection<? extends Document> docs, AddressBook ab) {
    Map<String, DetailedFacetItem> result = new LinkedHashMap<>();
    DetailedFacetItem f_owner = new DetailedFacetItem("Owner", "Incoming messages from the owner", "sender", "owner");
    // DetailedFacetItem f_out = new DetailedFacetItem("From any", "Incoming messages from anyone", "sender", "any");
    // this index is used to create dummy email doc. Here for each attachment we should create one document
    int indexToDifferentiate = 0;
    for (Document d : docs) {
        if (!(d instanceof EmailDocument))
            continue;
        EmailDocument ed = (EmailDocument) d;
        int sent_or_received = ed.sentOrReceived(ab);
        // f_in.addDoc(ed);
        if (// means this message is sent from owner
        (sent_or_received & EmailDocument.SENT_MASK) != 0) {
            // for each attachment in this doc create a document (dummy) in f_owner.
            for (Blob attachment : ed.attachments) {
                // create a dummy doc such that no two docs are same.
                EmailDocument edummy = new EmailDocument(ed.id, ed.emailSource, ed.folderName, ed.to, ed.cc, ed.bcc, ed.from, ed.getSubjectWithoutTitle(), ed.messageID + indexToDifferentiate, ed.date);
                // add it to dfi.
                f_owner.addDoc(edummy);
                indexToDifferentiate++;
            }
        }
    }
    if (f_owner.totalCount() > 0)
        result.put("Owner", f_owner);
    /*if (f_out.totalCount() > 0)
			result.put("Messages from all", f_out);
*/
    return result;
}
Also used : Blob(edu.stanford.muse.datacache.Blob)

Example 20 with Blob

use of edu.stanford.muse.datacache.Blob in project epadd by ePADD.

the class IndexUtils method partitionAttachmentsByFolder.

/*
	Semantics: Folder1-> Number of attachments in the mails present in this folder.
	Folder2-> Number of attachments in the mails present in this folder.
	 */
private static Map<String, DetailedFacetItem> partitionAttachmentsByFolder(Collection<? extends Document> docs) {
    Map<String, DetailedFacetItem> folderNameMap = new LinkedHashMap<>();
    // this index is used to create dummy email doc. Here for each attachment we should create one document
    int indexToDifferentiate = 0;
    for (Document d : docs) {
        if (!(d instanceof EmailDocument))
            continue;
        EmailDocument ed = (EmailDocument) d;
        String s = ed.folderName;
        if (s == null)
            continue;
        DetailedFacetItem f = folderNameMap.computeIfAbsent(s, s1 -> new DetailedFacetItem(Util.filePathTail(s1), s1, "folder", s1));
        // For each attachment in this mail add a dummy document.
        for (Blob attachment : ed.attachments) {
            // create a dummy doc such that no two docs are same.
            EmailDocument edummy = new EmailDocument(ed.id, ed.emailSource, ed.folderName, ed.to, ed.cc, ed.bcc, ed.from, ed.getSubjectWithoutTitle(), ed.messageID + indexToDifferentiate, ed.date);
            // add it to f.
            f.addDoc(edummy);
            indexToDifferentiate++;
        }
    }
    return folderNameMap;
}
Also used : Blob(edu.stanford.muse.datacache.Blob)

Aggregations

Blob (edu.stanford.muse.datacache.Blob)29 Pair (edu.stanford.muse.util.Pair)7 Pattern (java.util.regex.Pattern)5 BlobStore (edu.stanford.muse.datacache.BlobStore)4 Field (org.apache.lucene.document.Field)4 JSONException (org.json.JSONException)3 LinkedHashMultimap (com.google.common.collect.LinkedHashMultimap)2 Multimap (com.google.common.collect.Multimap)2 Gson (com.google.gson.Gson)2 AddressBook (edu.stanford.muse.AddressBookManager.AddressBook)2 Contact (edu.stanford.muse.AddressBookManager.Contact)2 CorrespondentAuthorityMapper (edu.stanford.muse.AddressBookManager.CorrespondentAuthorityMapper)2 AnnotationManager (edu.stanford.muse.AnnotationManager.AnnotationManager)2 Config (edu.stanford.muse.Config)2 Label (edu.stanford.muse.LabelManager.Label)2 LabelManager (edu.stanford.muse.LabelManager.LabelManager)2 edu.stanford.muse.email (edu.stanford.muse.email)2 NameInfo (edu.stanford.muse.ie.NameInfo)2 Document (edu.stanford.muse.index.Document)2 EmailDocument (edu.stanford.muse.index.EmailDocument)2