use of edu.stanford.muse.index.EmailDocument in project epadd by ePADD.
the class AddressBook method getOutMessageCount.
/**
* how many of the messages in the given collection are outgoing?
*/
public int getOutMessageCount(Collection<EmailDocument> docs) {
int count = 0;
Contact me = getContactForSelf();
if (me != null) {
for (EmailDocument ed : docs) {
String fromEmail = ed.getFromEmailAddress();
Set<String> selfAddrs = me.getEmails();
if (selfAddrs.contains(fromEmail))
count++;
}
}
return count;
}
use of edu.stanford.muse.index.EmailDocument in project epadd by ePADD.
the class SimpleSessions method saveArchive.
/**
* saves the archive in the current session to the cachedir. note: no blobs saved.
*/
public static boolean saveArchive(String baseDir, String name, Archive archive) throws IOException {
log.info("Before saving the archive checking if it is still in good shape");
archive.Verify();
String dir = baseDir + File.separatorChar + Archive.SESSIONS_SUBDIR;
// just to be safe
new File(dir).mkdirs();
String filename = dir + File.separatorChar + name + SimpleSessions.SESSION_SUFFIX;
log.info("Saving archive to (session) file " + filename);
/*//file path names of addressbook, entitybook and correspondentAuthorityMapper data.
String addressBookPath = dir + File.separatorChar + Archive.ADDRESSBOOK_SUFFIX;
String entityBookPath = dir + File.separatorChar + Archive.ENTITYBOOK_SUFFIX;
String cAuthorityPath = dir + File.separatorChar + Archive.CAUTHORITYMAPPER_SUFFIX;
*/
if (archive.collectionMetadata == null)
archive.collectionMetadata = new Archive.CollectionMetadata();
archive.collectionMetadata.timestamp = new Date().getTime();
archive.collectionMetadata.tz = TimeZone.getDefault().getID();
archive.collectionMetadata.nDocs = archive.getAllDocs().size();
archive.collectionMetadata.nUniqueBlobs = archive.blobStore.uniqueBlobs.size();
int totalAttachments = 0, images = 0, docs = 0, others = 0, sentMessages = 0, receivedMessages = 0, hackyDates = 0;
Date firstDate = null, lastDate = null;
for (Document d : archive.getAllDocs()) {
if (!(d instanceof EmailDocument))
continue;
EmailDocument ed = (EmailDocument) d;
if (ed.date != null) {
if (ed.hackyDate)
hackyDates++;
else {
if (firstDate == null || ed.date.before(firstDate))
firstDate = ed.date;
if (lastDate == null || ed.date.after(lastDate))
lastDate = ed.date;
}
}
int sentOrReceived = ed.sentOrReceived(archive.addressBook);
if ((sentOrReceived & EmailDocument.SENT_MASK) != 0)
sentMessages++;
if ((sentOrReceived & EmailDocument.RECEIVED_MASK) != 0)
receivedMessages++;
if (!Util.nullOrEmpty(ed.attachments)) {
totalAttachments += ed.attachments.size();
for (Blob b : ed.attachments) if (!Util.nullOrEmpty(b.filename)) {
if (Util.is_image_filename(b.filename))
images++;
else if (Util.is_doc_filename(b.filename))
docs++;
else
others++;
}
}
}
archive.collectionMetadata.firstDate = firstDate;
archive.collectionMetadata.lastDate = lastDate;
archive.collectionMetadata.nIncomingMessages = receivedMessages;
archive.collectionMetadata.nOutgoingMessages = sentMessages;
archive.collectionMetadata.nHackyDates = hackyDates;
archive.collectionMetadata.nBlobs = totalAttachments;
archive.collectionMetadata.nUniqueBlobs = archive.blobStore.uniqueBlobs.size();
archive.collectionMetadata.nImageBlobs = images;
archive.collectionMetadata.nDocBlobs = docs;
archive.collectionMetadata.nOtherBlobs = others;
try (ObjectOutputStream oos = new ObjectOutputStream(new GZIPOutputStream(new FileOutputStream(filename)))) {
oos.writeObject("archive");
oos.writeObject(archive);
} catch (Exception e1) {
Util.print_exception("Failed to write archive: ", e1, log);
}
// Now write modular transient fields to separate files-
// By Dec 2017 there are three transient fields which will be saved and loaded separately
// 1. AddressBook -- Stored in a gzip file with name in the same ` directory as of archive.
// 2. EntityBook
// 3. CorrespondentAuthorityMapper
// Before final release of v5 in Feb 2018, modularize annotation out of archive.
// ///////////////AddressBook Writing -- In human readable form ///////////////////////////////////
SimpleSessions.saveAddressBook(archive);
// //////////////EntityBook Writing -- In human readable form/////////////////////////////////////
SimpleSessions.saveEntityBook(archive);
// /////////////CAuthorityMapper Writing-- Serialized///////////////////////////////
SimpleSessions.saveCorrespondentAuthorityMapper(archive);
// ////////////LabelManager Writing -- Serialized//////////////////////////////////
SimpleSessions.saveLabelManager(archive);
// ////////////AnnotationManager writing-- In human readable form/////////////////////////////////////
SimpleSessions.saveAnnotations(archive);
writeCollectionMetadata(archive.collectionMetadata, baseDir);
/*
// now write out the metadata
String processingFilename = dir + File.separatorChar + name + Config.COLLECTION_METADATA_FILE;
oos = new ObjectOutputStream(new GZIPOutputStream(new FileOutputStream(processingFilename)));
try {
oos.writeObject(archive.collectionMetadata);
} catch (Exception e1) {
Util.print_exception("Failed to write archive's metadata: ", e1, log);
oos.close();
} finally {
oos.close();
}
*/
/*
if (archive.correspondentAuthorityMapper!= null) {
String authorityMapperFilename = dir + File.separatorChar + name + Config.AUTHORITIES_FILENAME;
oos = new ObjectOutputStream(new GZIPOutputStream(new FileOutputStream(authorityMapperFilename)));
try {
oos.writeObject(archive.correspondentAuthorityMapper);
} catch (Exception e1) {
Util.print_exception("Failed to write archive's authority mapper: ", e1, log);
oos.close();
} finally {
oos.close();
}
}
*/
archive.close();
// re-open for reading
archive.openForRead();
// note: no need of saving archive authorities separately -- they are already saved as part of the archive object
return true;
}
use of edu.stanford.muse.index.EmailDocument in project epadd by ePADD.
the class AddressBook method sortedContacts.
/**
* returns a list of all contacts in the given collection of docs, sorted by outgoing freq.
*/
public List<Contact> sortedContacts(Collection<EmailDocument> docs) {
Map<Contact, Integer> contactInCount = new LinkedHashMap<>(), contactOutCount = new LinkedHashMap<>();
// we'll also count the recipient twice if he sends a message to himself
for (EmailDocument ed : docs) {
String senderEmail = ed.getFromEmailAddress();
List<String> allEmails = ed.getAllAddrs();
for (String email : allEmails) {
Contact c = lookupByEmail(email);
if (c != null) {
if (senderEmail.equals(email)) {
Integer I = contactOutCount.get(c);
contactOutCount.put(c, (I == null) ? 1 : I + 1);
} else {
Integer I = contactInCount.get(c);
contactInCount.put(c, (I == null) ? 1 : I + 1);
}
}
}
}
// sort by in count -- note that when processing sent email, in count is the # of messages sent by the owner of the archive to the person #confusing
List<Pair<Contact, Integer>> pairs = Util.sortMapByValue(contactInCount);
Set<Contact> sortedContactsSet = new LinkedHashSet<>();
for (Pair<Contact, Integer> p : pairs) sortedContactsSet.add(p.getFirst());
// then by out count.
pairs = Util.sortMapByValue(contactOutCount);
for (Pair<Contact, Integer> p : pairs) sortedContactsSet.add(p.getFirst());
for (Contact c : sortedContactsSet) if (getContactId(c) < 0)
Util.warnIf(true, "Contact has -ve contact id: " + c, log);
return new ArrayList<>(sortedContactsSet);
}
use of edu.stanford.muse.index.EmailDocument in project epadd by ePADD.
the class AddressBook method fillL1_SummaryObject.
public void fillL1_SummaryObject(Collection<Document> alldocs) {
// clear the summary objects.
L1_Summary_SentDocs.clear();
L1_Summary_ReceivedDocs.clear();
L1_Summary_RecFrmOwnerDocs.clear();
// L1_Summary_totalInDocs.clear();
// Archive archive = ArchiveReaderWriter.getArchiveForArchiveID(archiveID);
AddressBook ab = this;
Contact ownContact = ab.getContactForSelf();
List<Contact> allContacts = ab.sortedContacts((Collection) alldocs);
List<EmailDocument> alldocslist = alldocs.stream().map(e -> (EmailDocument) e).collect(Collectors.toList());
// compute counts
for (EmailDocument ed : alldocslist) {
String senderEmail = ed.getFromEmailAddress();
Contact senderContact = ab.lookupByEmail(senderEmail);
if (senderContact == null)
continue;
// senderContact = ownContact; // should never happen, we should always have a sender contact: Don't do this otherwise wrongly reporting a message sent by owner.
// L1_Summary_SentDocs store the email messages where this contact is a sender.
{
Set<EmailDocument> tmp = L1_Summary_SentDocs.get(senderContact);
if (tmp == null)
tmp = new LinkedHashSet<>();
tmp.add(ed);
L1_Summary_SentDocs.put(senderContact, tmp);
}
// get receiver of this mail.
Collection<Contact> toContacts = ed.getToCCBCCContacts(ab);
for (Contact c : toContacts) {
// add the info that all these contacts received this mail.
Set<EmailDocument> tmp = L1_Summary_ReceivedDocs.get(c);
if (tmp == null)
tmp = new LinkedHashSet<>();
tmp.add(ed);
L1_Summary_ReceivedDocs.put(c, tmp);
}
// if this mail was received from the owner then add it in the map L1_Summary_RecFrmOwner
// for filling other fields.
int x = ed.sentOrReceived(ab);
// message could be both sent and received
if ((x & EmailDocument.SENT_MASK) != 0) {
// this is a sent email (sent by the owner), each to/cc/bcc gets +1 outcount.
// one of them could be own contact also.
toContacts = ed.getToCCBCCContacts(ab);
for (Contact c : toContacts) {
Set<EmailDocument> tmp = L1_Summary_RecFrmOwnerDocs.get(c);
if (tmp == null)
tmp = new LinkedHashSet<>();
tmp.add(ed);
L1_Summary_RecFrmOwnerDocs.put(c, tmp);
}
}
/* boolean received = (x & EmailDocument.RECEIVED_MASK) != 0 // explicitly received
|| (x & EmailDocument.SENT_MASK) == 0; // its not explicitly sent, so we must count it as received by default
if (received) {
// sender gets a +1 in count (could be ownContact also)
// all others get a mention count.
Set<EmailDocument> tmp = L1_Summary_RecFrmOwnerDocs.get(senderContact);
if(tmp==null)
tmp=new LinkedHashSet<>();
tmp.add(ed);
L1_Summary_RecFrmOwnerDocs.put(senderContact, tmp);
}
*/
// Removed the mention semantics in v7.
/* if ((x & EmailDocument.SENT_MASK) == 0) {
// this message is not sent, its received.
// add mentions for everyone who's not me, who's on the to/cc/bcc of this message.
Collection<Contact> toContacts = ed.getToCCBCCContacts(ab);
for (Contact c : toContacts) {
if (c == ownContact)
continue; // doesn't seem to make sense to give a mention count for sender in addition to incount
Set<EmailDocument> tmp = L1_Summary_contactMentionDocs.get(c);
if(tmp==null)
tmp=new LinkedHashSet<>();
tmp.add(ed);
L1_Summary_contactMentionDocs.put(c, tmp);
}
}*/
}
}
use of edu.stanford.muse.index.EmailDocument in project epadd by ePADD.
the class NameExpansion method getMatches.
/* Given the string s in emailDocument ed, returns a matches object with candidates matching s */
public static Matches getMatches(String s, Archive archive, EmailDocument ed, int maxResults) {
Matches matches = new Matches(s, maxResults);
AddressBook ab = archive.addressBook;
List<Contact> contactsExceptSelf = ed.getParticipatingContactsExceptOwn(archive.addressBook);
List<Contact> contacts = new ArrayList(contactsExceptSelf);
contacts.add(ab.getContactForSelf());
// check if s matches any contacts on this message
outer: for (Contact c : contacts) {
if (c.getNames() == null)
continue;
for (String name : c.getNames()) {
StringMatchType matchType = Matches.match(s, name);
if (matchType != null) {
float score = 1.0F;
if (matches.addMatch(name, score, matchType, "Name of a contact on this message", true))
return matches;
continue outer;
}
}
}
// check if s matches anywhere else in this message
if (matchAgainstEmailContent(archive, ed, matches, "Mentioned elsewhere in this message", 1.0F)) {
return matches;
}
synchronized (archive) {
if (ed.threadID == 0L) {
archive.assignThreadIds();
}
}
// check if s matches anywhere else in this thread
List<EmailDocument> messagesInThread = (List) archive.docsWithThreadId(ed.threadID);
for (EmailDocument messageInThread : messagesInThread) {
if (matchAgainstEmailContent(archive, messageInThread, matches, "Mentioned in this thread", 0.9F)) {
return matches;
}
}
// check if s matches any other email with any of these correspondents
for (Contact c : contactsExceptSelf) {
if (c.getEmails() != null) {
String correspondentsSearchStr = String.join(";", c.getEmails());
// As filterForCorrespondents function do not use queryparams therefore it is fine to instantiate SearchResult
// object with queryParams as null. After refactoring, filter methods take SearchObject as input and modify it
// according to the filter.
SearchResult inputSet = new SearchResult(archive, null);
SearchResult outputSet = SearchResult.filterForCorrespondents(inputSet, correspondentsSearchStr, true, true, true, true);
Set<Document> messagesWithSameCorrespondents = outputSet.getDocumentSet();
for (Document messageWithSameCorrespondents : messagesWithSameCorrespondents) {
EmailDocument edoc = (EmailDocument) messageWithSameCorrespondents;
if (matchAgainstEmailContent(archive, edoc, matches, "Mentioned in other messages with these correspondents", 0.8F)) {
return matches;
}
}
}
}
// search for s anywhere in the archive
Multimap<String, String> params = LinkedHashMultimap.create();
params.put("termSubject", "on");
params.put("termBody", "on");
String term = s;
if (s.contains(" ") && (!s.startsWith("\"") || !s.endsWith("\""))) {
term = "\"" + s + "\"";
}
// To search for terms, create a searchResult object and invoke appropriate filter method on it.
SearchResult inputSet = new SearchResult(archive, params);
SearchResult outputSet = SearchResult.searchForTerm(inputSet, term);
Set<Document> docsWithTerm = outputSet.getDocumentSet();
for (Document docWithTerm : docsWithTerm) {
EmailDocument edoc = (EmailDocument) docWithTerm;
if (matchAgainstEmailContent(archive, edoc, matches, "Mentioned elsewhere in this archive", 0.7F))
return matches;
}
return matches;
}
Aggregations