use of edu.stanford.muse.AddressBookManager.AddressBook in project epadd by ePADD.
the class IndexUtils method computeDetailedFacetsForAttachmentBrowsing.
/*
Compute facet list for attachment browsing screen.
NOTE: We want attachment browsing screen to follow natural behaviour which it was not following so far. For example, if a user wanted to see only pdf files
(and therefore clicked on attachmentType facet as pdf then it is possible that some non-pdf files will also get displayed. It is because the granularity of the
search set is messages not attachments. So if a message has a pdf as well as jpg file as attachments then that message is selected as a results set. And when
displaying the attachments of this set of messages the user will be able to see the jpg file as well.
To get back the natural behavior (that is showing only those types of files in the attachment views which are expected by the user, we need to pass the
requested file types from the front end to all those places in the backend where the attachment set impacts user experience. In this function we compute
the facet details for the set of attachments present in set resultset of messages. However, we need to discount those attachments (although they are present in
the resultset of messages) which were not requested by the user. It will impact 'partitionAttachmentByAttachmentType' and 'partitionAttachmentBySize' methods.
It will not impact other facet computations like partitionAttachmentByPerson, direction or folder (Guess why?) because these attributes are message specific not
attachment specific.
*/
public static Map<String, Collection<DetailedFacetItem>> computeDetailedFacetsForAttachmentBrowsing(Multimap<String, String> request, Collection<Document> docs, Archive archive) {
AddressBook addressBook = archive.addressBook;
Set<String> attachmentExtensionsOfInterest = getAttachmentExtensionsOfInterest(request);
Map<String, Collection<DetailedFacetItem>> facetMap = new LinkedHashMap<>();
if (!ModeConfig.isPublicMode()) {
Map<String, DetailedFacetItem> attachmentTypesMap = partitionAttachmentsByAttachmentType(archive, docs, attachmentExtensionsOfInterest);
facetMap.put("attachment type", attachmentTypesMap.values());
}
Map<String, DetailedFacetItem> attachmentSizeMap = partitionAttachmentsBySize(archive, docs, attachmentExtensionsOfInterest);
facetMap.put("attachment size", attachmentSizeMap.values());
if (addressBook != null) {
// people
Map<Contact, DetailedFacetItem> peopleMap = partitionAttachmentsByPerson(docs, addressBook);
facetMap.put("correspondent", peopleMap.values());
// direction (sender: only one if anything with owner)
Map<String, DetailedFacetItem> directionMap = partitionAttachmentsByDirection(docs, addressBook);
if (// this size can at max be 1 when there is at least one message sent from the owner
directionMap.size() > 0)
facetMap.put("sender", directionMap.values());
}
if (!ModeConfig.isPublicMode()) {
Map<String, DetailedFacetItem> folderNameMap = partitionAttachmentsByFolder(docs);
if (folderNameMap.size() > 0)
facetMap.put("folders", folderNameMap.values());
}
// sort so that in each topic, the heaviest facets are first
for (String s : facetMap.keySet()) {
Collection<DetailedFacetItem> detailedFacets = facetMap.get(s);
List<DetailedFacetItem> list = new ArrayList<>(detailedFacets);
Collections.sort(list);
facetMap.put(s, list);
}
return facetMap;
}
use of edu.stanford.muse.AddressBookManager.AddressBook in project epadd by ePADD.
the class EmailDocument method setOwners.
/*
This method combines the contacts corresponding to the passed email addresses and set the resultant contact as owner.
*/
public static void setOwners(Archive archive, Set<String> emailAddresses) {
AddressBook ab = archive.getAddressBook();
// get contacts for these email addresses.
Set<Contact> ofInterest = emailAddresses.stream().map(emailAddress -> ab.lookupByEmail(emailAddress)).collect(Collectors.toSet());
if (ofInterest.size() == 1) {
// if single element in the set ofInterest then simply set it as owner and recompute summary
Contact newowner = ofInterest.iterator().next();
ab.setContactForSelf(newowner);
ab.fillL1_SummaryObject(archive.getAllDocs());
} else {
// if multiple elements in the set ofInterest then
// combine them to create a new contact.
Contact newone = new Contact();
// MailingList ml=null;
ofInterest.forEach(contact -> {
newone.merge(contact);
// if(ml==null && ab.mailingListMap.containsKey(contact))
// ml = ab.mailingListMap.get(contact);
});
// set it as ML if any of the contact is ML.-- NOT DONE
// add contact to contactListForIds
ab.contactListForIds.add(newone);
// remove all these contacts (obtained from the input email addresses)
ab.removeContacts(ofInterest);
// set the newly create contact as owner.
ab.setContactForSelf(newone);
// recompute L1 summary,not needed now. Just make sure that the caller is invoking it after completion of this method.
// just to avoid unnecessary duplication of this call.
// ab.fillL1_SummaryObject(archive.getAllDocs());
}
}
use of edu.stanford.muse.AddressBookManager.AddressBook in project epadd by ePADD.
the class EmailDocument method recomputeAddressBook.
/**
* This method recomputes addressbook based on a set of trusted email addresses. Initially (when importing an archive) only the owner's
* email id (provided at the time of import) is taken as the trusted email addresses. See {@link #buildAddressBook(Collection, Collection, Collection)}
* @param archive
* @param trustedAddress - a set of trusted email addresses.
* @return a new addressbook
*/
public static AddressBook recomputeAddressBook(Archive archive, Set<String> trustedAddress) {
AddressBook oldAddressBook = archive.getAddressBook();
Set<String> ownAddresses = oldAddressBook.getOwnAddrs();
Set<String> ownNames = oldAddressBook.getOwnNamesSet();
String[] ownAddressesArray = ownAddresses.toArray(new String[ownAddresses.size()]);
String[] ownNamesArray = ownNames.toArray(new String[ownNames.size()]);
AddressBook newAddressBook = new AddressBook(ownAddressesArray, ownNamesArray);
// log.info("Own addresses: " + EmailUtils.emailAddrsToString(ownAddrs));
EmailFetcherThread.log.debug("Recomputing addressbook with trusted email addresses as- " + trustedAddress.size());
Collection<EmailDocument> edocs = archive.getAllDocsAsSet().stream().map(doc -> ((EmailDocument) doc)).collect(Collectors.toSet());
// Add original owner's emails also as trusted addresses.
// Q. Should we add all combined addresses corresponding to the owner's email ID as well? Not added right now.
// That set is obtained by the following piece of code.
// Set<String> owneremails_grouped = oldAddressBook.getContact(0).getEmails();
trustedAddress.addAll(ownAddresses);
fillAddressBookFromTrustedAddresses(edocs, trustedAddress, newAddressBook);
return newAddressBook;
}
use of edu.stanford.muse.AddressBookManager.AddressBook in project epadd by ePADD.
the class Archive method export.
/**
* a fresh archive is created under out_dir. name is the name of the session
* under it. blobs are exported into this archive dir. destructive! but
* should be so only in memory. original files on disk should be unmodified.
*
* @param retainedDocs
* @throws Exception
*/
public synchronized String export(Collection<? extends Document> retainedDocs, Export_Mode export_mode, String out_dir, String name) throws Exception {
if (Util.nullOrEmpty(out_dir))
return null;
File dir = new File(out_dir);
if (dir.exists() && dir.isDirectory()) {
log.warn("Overwriting existing directory '" + out_dir + "' (it may already exist)");
FileUtils.deleteDirectory(dir);
} else if (!dir.mkdirs()) {
log.warn("Unable to create directory: " + out_dir);
return null;
}
boolean exportInPublicMode = export_mode == Export_Mode.EXPORT_PROCESSING_TO_DISCOVERY;
Archive.prepareBaseDir(out_dir);
if (!exportInPublicMode && new File(baseDir + File.separator + LEXICONS_SUBDIR).exists())
FileUtils.copyDirectory(new File(baseDir + File.separator + LEXICONS_SUBDIR), new File(out_dir + File.separator + LEXICONS_SUBDIR));
if (new File(baseDir + File.separator + IMAGES_SUBDIR).exists())
FileUtils.copyDirectory(new File(baseDir + File.separator + IMAGES_SUBDIR), new File(out_dir + File.separator + IMAGES_SUBDIR));
// internal disambiguation cache
if (new File(baseDir + File.separator + FEATURES_SUBDIR).exists())
FileUtils.copyDirectory(new File(baseDir + File.separator + FEATURES_SUBDIR), new File(out_dir + File.separator + FEATURES_SUBDIR));
if (new File(baseDir + File.separator + edu.stanford.muse.Config.AUTHORITY_ASSIGNER_FILENAME).exists())
FileUtils.copyFile(new File(baseDir + File.separator + edu.stanford.muse.Config.AUTHORITY_ASSIGNER_FILENAME), new File(out_dir + File.separator + edu.stanford.muse.Config.AUTHORITY_ASSIGNER_FILENAME));
// save the states that may get modified
List<Document> savedAllDocs = allDocs;
LabelManager oldLabelManager = getLabelManager();
// change state of the current archive -temporarily//////////
if (exportInPublicMode) {
// replace description with names;
replaceDescriptionWithNames(allDocs, this);
} else {
allDocs = new ArrayList<>(retainedDocs);
}
Set<String> retainedDocIDs = retainedDocs.stream().map(Document::getUniqueId).collect(Collectors.toSet());
LabelManager newLabelManager = getLabelManager().getLabelManagerForExport(retainedDocIDs, export_mode);
setLabelManager(newLabelManager);
// copy index and if for public mode, also redact body and remove title
// fields
final boolean redact_body_instead_of_remove = true;
Set<String> docIdSet = new LinkedHashSet<>();
for (Document d : allDocs) docIdSet.add(d.getUniqueId());
final Set<String> retainedDocIds = docIdSet;
Indexer.FilterFunctor emailFilter = doc -> {
if (!retainedDocIds.contains(doc.get("docId")))
return false;
if (exportInPublicMode) {
String text;
if (redact_body_instead_of_remove) {
text = doc.get("body");
}
doc.removeFields("body");
doc.removeFields("body_original");
if (text != null) {
String redacted_text = IndexUtils.retainOnlyNames(text, doc);
doc.add(new Field("body", redacted_text, Indexer.full_ft));
// this uses standard analyzer, not stemming because redacted bodys only have names.
}
String title = doc.get("title");
doc.removeFields("title");
if (title != null) {
String redacted_title = IndexUtils.retainOnlyNames(text, doc);
doc.add(new Field("title", redacted_title, Indexer.full_ft));
}
}
return true;
};
/*
Moveing it at the end- after changing the basedir of the archive. Because addressbook is getting saved
after maskEmailDomain.
if (exportInPublicMode) {
List<Document> docs = this.getAllDocs();
List<EmailDocument> eds = new ArrayList<>();
for (Document doc : docs)
eds.add((EmailDocument) doc);
EmailUtils.maskEmailDomain(eds, this.addressBook);
}
*/
Indexer.FilterFunctor attachmentFilter = doc -> {
if (exportInPublicMode) {
return false;
}
String docId = doc.get("emailDocId");
if (docId == null) {
Integer di = Integer.parseInt(doc.get("docId"));
// don't want to print too many messages
if (di < 10)
log.error("Looks like this is an old archive, filtering all the attachments!!\n" + "Consider re-indexing with the latest version for a proper export.");
return false;
}
return retainedDocIds.contains(docId);
};
indexer.copyDirectoryWithDocFilter(out_dir, emailFilter, attachmentFilter);
log.info("Completed exporting indexes");
// save the blobs in a new blobstore
if (!exportInPublicMode) {
log.info("Starting to export blobs, old blob store is: " + blobStore);
Set<Blob> blobsToKeep = new LinkedHashSet<>();
for (Document d : allDocs) if (d instanceof EmailDocument)
if (!Util.nullOrEmpty(((EmailDocument) d).attachments))
blobsToKeep.addAll(((EmailDocument) d).attachments);
String blobsDir = out_dir + File.separatorChar + BLOBS_SUBDIR;
new File(blobsDir).mkdirs();
BlobStore newBlobStore = blobStore.createCopy(blobsDir, blobsToKeep);
log.info("Completed exporting blobs, newBlobStore in dir: " + blobsDir + " is: " + newBlobStore);
// switch to the new blob store (important -- the urls and indexes in the new blob store are different from the old one! */
blobStore = newBlobStore;
}
String oldBaseDir = baseDir;
// change base directory
setBaseDir(out_dir);
if (exportInPublicMode) {
List<Document> docs = this.getAllDocs();
List<EmailDocument> eds = new ArrayList<>();
for (Document doc : docs) eds.add((EmailDocument) doc);
EmailUtils.maskEmailDomain(eds, this.addressBook);
}
// write out the archive file
// save .session file.
SimpleSessions.saveArchive(out_dir, name, this);
log.info("Completed saving archive object");
// restore states
setBaseDir(oldBaseDir);
allDocs = savedAllDocs;
setLabelManager(oldLabelManager);
return out_dir;
}
use of edu.stanford.muse.AddressBookManager.AddressBook in project epadd by ePADD.
the class IndexUtils method computeDetailedFacets.
/**
* version that stores actual dates instead of just counts for each facet
*/
public static Map<String, Collection<DetailedFacetItem>> computeDetailedFacets(Collection<Document> docs, Archive archive) {
AddressBook addressBook = archive.addressBook;
Map<String, Collection<DetailedFacetItem>> facetMap = new LinkedHashMap<>();
if (addressBook != null) {
// people
Map<Contact, DetailedFacetItem> peopleMap = partitionDocsByPerson(docs, addressBook);
facetMap.put("correspondent", peopleMap.values());
// direction
Map<String, DetailedFacetItem> directionMap = partitionDocsByDirection(docs, addressBook);
if (directionMap.size() > 1)
facetMap.put("direction", directionMap.values());
/*
--No longer need this code as restriction, reviewed etc. are handled by labels--
// flags -- provide them only if they have at least 2 types in these docs. if all docs have the same value for a particular flag, no point showing it.
Map<String, DetailedFacetItem> doNotTransferMap = partitionDocsByDoNotTransfer(docs);
if (doNotTransferMap.size() > 1)
facetMap.put("transfer", doNotTransferMap.values());
Map<String, DetailedFacetItem> transferWithRestrictionsMap = partitionDocsByTransferWithRestrictions(docs);
if (transferWithRestrictionsMap.size() > 1)
facetMap.put("restrictions", transferWithRestrictionsMap.values());
Map<String, DetailedFacetItem> reviewedMap = partitionDocsByReviewed(docs);
if (reviewedMap.size() > 1)
facetMap.put("reviewed", reviewedMap.values());
*/
// facet for restriction labels
Map<String, DetailedFacetItem> restrlabels = partitionDocsByLabelTypes(docs, archive, LabelManager.LabType.RESTRICTION);
facetMap.put("Restriction Labels", restrlabels.values());
// facet for general labels
Map<String, DetailedFacetItem> genlabels = partitionDocsByLabelTypes(docs, archive, LabelManager.LabType.GENERAL);
facetMap.put("General Labels", genlabels.values());
// facet for accession IDs- only in modes other than appraisal
if (!ModeConfig.isAppraisalMode()) {
Map<String, DetailedFacetItem> accIDs = partitionDocsByAccessionID(docs, archive);
facetMap.put("Accessions", accIDs.values());
}
Map<String, DetailedFacetItem> annotationPresenceMap = partitionDocsByAnnotationPresence(docs, archive);
facetMap.put("Annotations", annotationPresenceMap.values());
// attachments
if (!ModeConfig.isPublicMode()) {
Map<String, DetailedFacetItem> attachmentTypesMap = partitionDocsByAttachmentType(docs);
facetMap.put("attachment type", attachmentTypesMap.values());
}
}
if (!ModeConfig.isPublicMode()) {
Map<String, DetailedFacetItem> folderNameMap = partitionDocsByFolder(docs);
if (folderNameMap.size() > 0)
facetMap.put("folders", folderNameMap.values());
}
// sort so that in each topic, the heaviest facets are first
for (String s : facetMap.keySet()) {
Collection<DetailedFacetItem> detailedFacets = facetMap.get(s);
List<DetailedFacetItem> list = new ArrayList<>(detailedFacets);
Collections.sort(list);
facetMap.put(s, list);
}
return facetMap;
}
Aggregations