use of edu.stanford.muse.AddressBookManager.AddressBook in project epadd by ePADD.
the class SimpleSessions method loadSessionAsMap.
/**
* loads session from the given filename, and returns the map of loaded
* attributes.
* if readOnly is false, caller MUST make sure to call packIndex.
* baseDir is Indexer's baseDir (path before "indexes/")
*
* @throws IOException
* @throws LockObtainFailedException
* @throws CorruptIndexException
* Change as on Nov 2017-
* Earlier the whole archive was serialized and deserialized as one big entity. Now it is broken into
* four main parts, Addressbook, entitybook, correspondentAuthorityMapper and the rest of the object
* We save all these four components separately in saveArchive. Therefore while reading, we need to read
* all those separately from appropriate files.
*/
public static Map<String, Object> loadSessionAsMap(String filename, String baseDir, boolean readOnly) throws IOException {
log.info("Loading session from file " + filename + " size: " + Util.commatize(new File(filename).length() / 1024) + " KB");
ObjectInputStream ois = null;
// keep reading till eof exception
Map<String, Object> result = new LinkedHashMap<>();
try {
ois = new ObjectInputStream(new GZIPInputStream(new FileInputStream(filename)));
while (true) {
String key = (String) ois.readObject();
log.info("loading key: " + key);
try {
Object value = ois.readObject();
if (value == null)
break;
result.put(key, value);
} catch (InvalidClassException ice) {
log.error("Bad version for value of key " + key + ": " + ice + "\nContinuing but this key is not set...");
} catch (ClassNotFoundException cnfe) {
log.error("Class not found for value of key " + key + ": " + cnfe + "\nContinuing but this key is not set...");
}
}
} catch (EOFException eof) {
log.info("end of session file reached");
} catch (Exception e) {
log.warn("Warning unable to load session: " + Util.stackTrace(e));
result.clear();
}
if (ois != null)
try {
ois.close();
} catch (Exception e) {
Util.print_exception(e, log);
}
// need to set up sentiments explicitly -- now no need since lexicon is part of the session
log.info("Memory status: " + Util.getMemoryStats());
Archive archive = (Archive) result.get("archive");
// no groups in public mode
if (archive != null) {
/*
Read other three modules of Archive object which were set as transient and hence did not serialize.
*/
// file path names of addressbook, entitybook and correspondentAuthorityMapper data.
String dir = baseDir + File.separatorChar + Archive.SESSIONS_SUBDIR;
String addressBookPath = dir + File.separatorChar + Archive.ADDRESSBOOK_SUFFIX;
String entityBookPath = dir + File.separatorChar + Archive.ENTITYBOOK_SUFFIX;
String cAuthorityPath = dir + File.separatorChar + Archive.CAUTHORITYMAPPER_SUFFIX;
String labMapDirPath = dir + File.separatorChar + Archive.LABELMAPDIR;
String annotationMapPath = dir + File.separatorChar + Archive.ANNOTATION_SUFFIX;
// above three files are not present. In that case start afresh with importing the email-archive again in processing mode.
if (!(new File(addressBookPath).exists()) || !(new File(entityBookPath).exists()) || !(new File(cAuthorityPath).exists())) {
result.put("archive", null);
return result;
}
// ///////////////AddressBook////////////////////////////////////////////
BufferedReader br = new BufferedReader(new FileReader(addressBookPath));
AddressBook ab = AddressBook.readObjectFromStream(br);
archive.addressBook = ab;
br.close();
// //////////////EntityBook/////////////////////////////////////
br = new BufferedReader(new FileReader(entityBookPath));
EntityBook eb = EntityBook.readObjectFromStream(br);
archive.setEntityBook(eb);
br.close();
// /////////////CorrespondentAuthorityMapper/////////////////////////////
CorrespondentAuthorityMapper cmapper = null;
cmapper = CorrespondentAuthorityMapper.readObjectFromStream(cAuthorityPath);
archive.correspondentAuthorityMapper = cmapper;
// ///////////////Label Mapper/////////////////////////////////////////////////////
LabelManager labelManager = null;
try {
labelManager = LabelManager.readObjectFromStream(labMapDirPath);
} catch (Exception e) {
Util.print_exception("Exception in reading label manager from archive, assigning a new label manager", e, log);
labelManager = new LabelManager();
}
archive.setLabelManager(labelManager);
// /////////////Annotation Manager///////////////////////////////////////////////////////
AnnotationManager annotationManager = AnnotationManager.readObjectFromStream(annotationMapPath);
archive.setAnnotationManager(annotationManager);
// this is useful when we import a legacy archive into processing, where we've updated the pm file directly, without updating the archive.
try {
archive.collectionMetadata = readCollectionMetadata(baseDir);
} catch (Exception e) {
Util.print_exception("Error trying to read processing metadata file", e, log);
}
// ///////////////////////////Done reading//////////////////////////////////////////////////////
// most of this code should probably move inside Archive, maybe a function called "postDeserialized()"
archive.postDeserialized(baseDir, readOnly);
result.put("emailDocs", archive.getAllDocs());
}
return result;
}
use of edu.stanford.muse.AddressBookManager.AddressBook in project epadd by ePADD.
the class MuseEmailFetcher method fetchAndIndexEmails.
/**
* key method to fetch actual email messages. can take a long time.
* @param session is used only to set the status provider object. callers who do not need to track status can leave it as null
* @param selectedFolders is in the format <account name>^-^<folder name>
* @param session is used only to put a status object in. can be null in which case status object is not set.
* emailDocs, addressBook and blobstore
* @throws NoDefaultFolderException
*/
public void fetchAndIndexEmails(Archive archive, String[] selectedFolders, boolean useDefaultFolders, FetchConfig fetchConfig, HttpSession session) throws MessagingException, InterruptedException, IOException, JSONException, NoDefaultFolderException, CancelledException {
setupFetchers(-1);
long startTime = System.currentTimeMillis();
if (session != null)
session.setAttribute("statusProvider", new StaticStatusProvider("Starting to process messages..."));
boolean op_cancelled = false, out_of_mem = false;
BlobStore attachmentsStore = archive.getBlobStore();
fetchConfig.downloadAttachments = fetchConfig.downloadAttachments && attachmentsStore != null;
if (Util.nullOrEmpty(fetchers)) {
log.warn("Trying to fetch email with no fetchers, setup not called ?");
return;
}
setupFoldersForFetchers(fetchers, selectedFolders, useDefaultFolders);
List<FolderInfo> fetchedFolderInfos = new ArrayList<>();
// one fetcher will aggregate everything
FetchStats stats = new FetchStats();
MTEmailFetcher aggregatingFetcher = null;
// a fetcher is one source, like an account or a top-level mbox dir. A fetcher could include multiple folders.
long startTimeMillis = System.currentTimeMillis();
for (MTEmailFetcher fetcher : fetchers) {
if (session != null)
session.setAttribute("statusProvider", fetcher);
fetcher.setArchive(archive);
fetcher.setFetchConfig(fetchConfig);
log.info("Memory status before fetching emails: " + Util.getMemoryStats());
// this is the big call, can run for a long time. Note: running in the same thread, its not fetcher.start();
List<FolderInfo> foldersFetchedByThisFetcher = fetcher.run();
// but don't abort immediately, only at the end, after addressbook has been built for at least the processed messages
if (fetcher.isCancelled()) {
log.info("NOTE: fetcher operation was cancelled");
op_cancelled = true;
break;
}
if (fetcher.mayHaveRunOutOfMemory()) {
log.warn("Fetcher operation ran out of memory " + fetcher);
out_of_mem = true;
break;
}
fetchedFolderInfos.addAll(foldersFetchedByThisFetcher);
if (aggregatingFetcher == null && !Util.nullOrEmpty(foldersFetchedByThisFetcher))
// first non-empty fetcher
aggregatingFetcher = fetcher;
if (aggregatingFetcher != null)
aggregatingFetcher.merge(fetcher);
// add the indexed folders to the stats
EmailStore store = fetcher.getStore();
String fetcherDescription = store.displayName + ":" + store.emailAddress;
for (FolderInfo fi : fetchedFolderInfos) stats.selectedFolders.add(new Pair<>(fetcherDescription, fi));
}
if (op_cancelled)
throw new CancelledException();
if (out_of_mem)
throw new OutOfMemoryError();
if (aggregatingFetcher != null) {
stats.importStats = aggregatingFetcher.stats;
if (aggregatingFetcher.mayHaveRunOutOfMemory())
throw new OutOfMemoryError();
}
// save memory
aggregatingFetcher = null;
long endTimeMillis = System.currentTimeMillis();
long elapsedMillis = endTimeMillis - startTimeMillis;
log.info(elapsedMillis + " ms for fetch+index, Memory status: " + Util.getMemoryStats());
// note: this is all archive docs, not just the ones that may have been just imported
List<EmailDocument> allEmailDocs = (List) archive.getAllDocs();
archive.addFetchedFolderInfos(fetchedFolderInfos);
if (allEmailDocs.size() == 0)
log.warn("0 messages from email fetcher");
EmailUtils.cleanDates(allEmailDocs);
// create a new address book
if (session != null)
session.setAttribute("statusProvider", new StaticStatusProvider("Building address book..."));
AddressBook addressBook = EmailDocument.buildAddressBook(allEmailDocs, archive.ownerEmailAddrs, archive.ownerNames);
log.info("Address book stats: " + addressBook.getStats());
if (session != null)
session.setAttribute("statusProvider", new StaticStatusProvider("Finishing up..."));
archive.setAddressBook(addressBook);
// we shouldn't really have dups now because the archive ensures that only unique docs are added
// move sorting to archive.postprocess?
EmailUtils.removeDupsAndSort(allEmailDocs);
// report stats
stats.lastUpdate = new Date().getTime();
// (String) JSPHelper.getSessionAttribute(session, "userKey");
stats.userKey = "USER KEY UNUSED";
stats.fetchAndIndexTimeMillis = elapsedMillis;
updateStats(archive, addressBook, stats);
if (session != null)
session.removeAttribute("statusProvider");
log.info("Fetch+index complete: " + Util.commatize(System.currentTimeMillis() - startTime) + " ms");
}
use of edu.stanford.muse.AddressBookManager.AddressBook in project epadd by ePADD.
the class SearchResult method filterForCorrespondents.
/**
* returns only the docs where the name or email address in the given field matches correspondentsStr in the given field(s).
* correspondentsStr can be or-delimited and specify multiple strings.
*/
public static SearchResult filterForCorrespondents(SearchResult inputSet, String correspondentsStr, boolean checkToField, boolean checkFromField, boolean checkCcField, boolean checkBccField) {
Set<Contact> searchedContacts = new LinkedHashSet<>();
AddressBook ab = inputSet.archive.addressBook;
Set<String> correspondents = Util.splitFieldForOr(correspondentsStr);
for (String s : correspondents) {
// this lookup will normalize, be case-insensitive, etc.
Collection<Contact> contacts = ab.lookupByEmailOrName(s);
if (contacts != null)
searchedContacts.addAll(contacts);
}
// keep on removing those documents from allDocs which do not have any contact that matches ANY of searchedContacts
inputSet.matchedDocs = inputSet.matchedDocs.entrySet().stream().filter(k -> {
EmailDocument ed = (EmailDocument) k.getKey();
Collection<Contact> contactsOfInterest = new LinkedHashSet<>();
if (// add from addresses
checkFromField && ed.from != null)
contactsOfInterest.addAll(Arrays.stream(ed.from).map(address -> ab.lookupByAddress(address)).collect(Collectors.toList()));
if (// add to address
checkToField && ed.to != null)
contactsOfInterest.addAll(Arrays.stream(ed.to).map(address -> ab.lookupByAddress(address)).collect(Collectors.toList()));
if (// add ccd addresses
checkCcField && ed.cc != null)
contactsOfInterest.addAll(Arrays.stream(ed.cc).map(address -> ab.lookupByAddress(address)).collect(Collectors.toList()));
if (// add bcc address
checkToField & ed.bcc != null)
contactsOfInterest.addAll(Arrays.stream(ed.bcc).map(address -> ab.lookupByAddress(address)).collect(Collectors.toList()));
// Collection<Contact> contactsInMessage = EmailUtils.getContactsForMessage(ab, ed);
return contactsOfInterest.stream().anyMatch(searchedContacts::contains);
}).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
return inputSet;
}
use of edu.stanford.muse.AddressBookManager.AddressBook in project epadd by ePADD.
the class EmailRenderer method getHTMLForHeader.
/**
* returns a HTML table string for the doc header
*
* @throws IOException
*/
private static StringBuilder getHTMLForHeader(EmailDocument ed, SearchResult searchResult, boolean IA_links, boolean debug) throws IOException {
AddressBook addressBook = searchResult.getArchive().addressBook;
Set<String> contactNames = new LinkedHashSet<>();
Set<String> contactAddresses = new LinkedHashSet<>();
String archiveID = ArchiveReaderWriter.getArchiveIDForArchive(searchResult.getArchive());
// get contact ids from searchResult object.
Set<Integer> highlightContactIds = searchResult.getHLInfoContactIDs().stream().map(Integer::parseInt).collect(Collectors.toSet());
if (highlightContactIds != null)
for (Integer hci : highlightContactIds) {
if (hci == null)
continue;
Contact c = searchResult.getArchive().addressBook.getContact(hci);
if (c == null)
continue;
contactNames.addAll(c.getNames());
contactAddresses.addAll(c.getEmails());
}
// get highlight terms from searchResult object for this document.
Set<String> highlightTerms = searchResult.getHLInfoTerms(ed);
StringBuilder result = new StringBuilder();
// header table
result.append("<table class=\"docheader rounded\">\n");
// + this.folderName + "</td></tr>\n");
if (debug)
result.append("<tr><td>docId: </td><td>" + ed.getUniqueId() + "</td></tr>\n");
result.append(JSPHelper.getHTMLForDate(archiveID, ed.date));
final String style = "<tr><td align=\"right\" class=\"muted\" valign=\"top\">";
// email specific headers
result.append(style + "From: </td><td align=\"left\">");
Address[] addrs = ed.from;
// get ArchiveID
if (addrs != null) {
result.append(formatAddressesAsHTML(archiveID, addrs, addressBook, TEXT_WRAP_WIDTH, highlightTerms, contactNames, contactAddresses));
}
result.append("\n</td></tr>\n");
result.append(style + "To: </td><td align=\"left\">");
addrs = ed.to;
if (addrs != null)
result.append(formatAddressesAsHTML(archiveID, addrs, addressBook, TEXT_WRAP_WIDTH, highlightTerms, contactNames, contactAddresses) + "");
result.append("\n</td></tr>\n");
if (ed.cc != null && ed.cc.length > 0) {
result.append(style + "Cc: </td><td align=\"left\">");
result.append(formatAddressesAsHTML(archiveID, ed.cc, addressBook, TEXT_WRAP_WIDTH, highlightTerms, contactNames, contactAddresses) + "");
result.append("\n</td></tr>\n");
}
if (ed.bcc != null && ed.bcc.length > 0) {
result.append(style + "Bcc: </td><td align=\"left\">");
result.append(formatAddressesAsHTML(archiveID, ed.bcc, addressBook, TEXT_WRAP_WIDTH, highlightTerms, contactNames, contactAddresses) + "");
result.append("\n</td></tr>\n");
}
String x = ed.description;
if (x == null)
x = "<None>";
result.append(style + "Subject: </td>");
// <pre> to escape special chars if any in the subject. max 70 chars in
// one line, otherwise spill to next line
result.append("<td align=\"left\"><b>");
x = DatedDocument.formatStringForMaxCharsPerLine(x, 70).toString();
if (x.endsWith("\n"))
x = x.substring(0, x.length() - 1);
Span[] names = searchResult.getArchive().getAllNamesInDoc(ed, false);
// Contains all entities and id if it is authorised else null
Map<String, Entity> entitiesWithId = new HashMap<>();
// we annotate three specially recognized types
Map<Short, String> recMap = new HashMap<>();
recMap.put(NEType.Type.PERSON.getCode(), "cp");
recMap.put(NEType.Type.PLACE.getCode(), "cl");
recMap.put(NEType.Type.ORGANISATION.getCode(), "co");
Arrays.stream(names).filter(n -> recMap.containsKey(NEType.getCoarseType(n.type).getCode())).forEach(n -> {
Set<String> types = new HashSet<>();
types.add(recMap.get(NEType.getCoarseType(n.type).getCode()));
entitiesWithId.put(n.text, new Entity(n.text, null, types));
});
x = searchResult.getArchive().annotate(x, ed.getDate(), ed.getUniqueId(), searchResult.getRegexToHighlight(), highlightTerms, entitiesWithId, IA_links, false);
result.append(x);
result.append("</b>\n");
result.append("\n</td></tr>\n");
// String messageId = Util.hash (ed.getSignature());
// String messageLink = "(<a href=\"browse?archiveID="+archiveID+"&adv-search=1&uniqueId=" + messageId + "\">Link</a>)";
// result.append ("\n" + style + "ID: " + "</td><td>" + messageId + " " + messageLink + "</td></tr>");
// end docheader table
result.append("</table>\n");
if (ModeConfig.isPublicMode())
return new StringBuilder(Util.maskEmailDomain(result.toString()));
return result;
}
use of edu.stanford.muse.AddressBookManager.AddressBook in project epadd by ePADD.
the class Archive method export.
/**
* a fresh archive is created under out_dir. name is the name of the session
* under it. blobs are exported into this archive dir. destructive! but
* should be so only in memory. original files on disk should be unmodified.
*
* @param retainedDocs
* @throws Exception
*/
public synchronized String export(Collection<? extends Document> retainedDocs, Export_Mode export_mode, String out_dir, String name, Consumer<StatusProvider> setStatusProvider) throws Exception {
if (Util.nullOrEmpty(out_dir))
return null;
File dir = new File(out_dir);
if (dir.exists() && dir.isDirectory()) {
log.warn("Overwriting existing directory '" + out_dir + "' (it may already exist)");
FileUtils.deleteDirectory(dir);
} else if (!dir.mkdirs()) {
log.warn("Unable to create directory: " + out_dir);
return null;
}
String statusmsg = export_mode == Export_Mode.EXPORT_APPRAISAL_TO_PROCESSING ? "Exporting to Processing" : (export_mode == Export_Mode.EXPORT_PROCESSING_TO_DISCOVERY ? "Exporting to Discovery" : "Exporting to Delivery");
boolean exportInPublicMode = export_mode == Export_Mode.EXPORT_PROCESSING_TO_DISCOVERY;
setStatusProvider.accept(new StaticStatusProvider(statusmsg + ":" + "Preparing base directory.."));
prepareBaseDir(out_dir);
if (!exportInPublicMode && new File(baseDir + File.separator + Archive.BAG_DATA_FOLDER + File.separatorChar + LEXICONS_SUBDIR).exists())
FileUtils.copyDirectory(new File(baseDir + File.separator + Archive.BAG_DATA_FOLDER + File.separatorChar + LEXICONS_SUBDIR), new File(out_dir + File.separator + Archive.BAG_DATA_FOLDER + File.separatorChar + LEXICONS_SUBDIR));
// copy normalization file if it exists
if (!exportInPublicMode && new File(baseDir + File.separator + Archive.BAG_DATA_FOLDER + File.separatorChar + Archive.SESSIONS_SUBDIR + File.separator + Archive.BLOBLNORMALIZATIONFILE_SUFFIX).exists())
FileUtils.copyFile(new File(baseDir + File.separator + Archive.BAG_DATA_FOLDER + File.separatorChar + Archive.SESSIONS_SUBDIR + File.separator + Archive.BLOBLNORMALIZATIONFILE_SUFFIX), new File(out_dir + File.separator + Archive.BAG_DATA_FOLDER + File.separatorChar + Archive.SESSIONS_SUBDIR + File.separator + Archive.BLOBLNORMALIZATIONFILE_SUFFIX));
if (new File(baseDir + File.separator + Archive.BAG_DATA_FOLDER + File.separatorChar + IMAGES_SUBDIR).exists())
FileUtils.copyDirectory(new File(baseDir + File.separator + Archive.BAG_DATA_FOLDER + File.separatorChar + IMAGES_SUBDIR), new File(out_dir + File.separator + Archive.BAG_DATA_FOLDER + File.separatorChar + IMAGES_SUBDIR));
// internal disambiguation cache
if (new File(baseDir + File.separator + Archive.BAG_DATA_FOLDER + File.separatorChar + FEATURES_SUBDIR).exists())
FileUtils.copyDirectory(new File(baseDir + File.separator + Archive.BAG_DATA_FOLDER + File.separatorChar + FEATURES_SUBDIR), new File(out_dir + File.separator + Archive.BAG_DATA_FOLDER + File.separatorChar + FEATURES_SUBDIR));
if (new File(baseDir + File.separator + Archive.BAG_DATA_FOLDER + File.separatorChar + edu.stanford.muse.Config.AUTHORITY_ASSIGNER_FILENAME).exists())
FileUtils.copyFile(new File(baseDir + File.separator + Archive.BAG_DATA_FOLDER + File.separatorChar + edu.stanford.muse.Config.AUTHORITY_ASSIGNER_FILENAME), new File(out_dir + File.separator + Archive.BAG_DATA_FOLDER + File.separatorChar + edu.stanford.muse.Config.AUTHORITY_ASSIGNER_FILENAME));
// save the states that may get modified
List<Document> savedAllDocs = allDocs;
LabelManager oldLabelManager = getLabelManager();
// change state of the current archive -temporarily//////////
if (exportInPublicMode) {
// replace description with names;
allDocs = new ArrayList<>(retainedDocs);
replaceDescriptionWithNames(allDocs, this);
// Also replace the attachment information present in EmailDocument Object
redactAttachmentDetailsFromDocs(allDocs, this);
} else {
allDocs = new ArrayList<>(retainedDocs);
}
Set<String> retainedDocIDs = retainedDocs.stream().map(Document::getUniqueId).collect(Collectors.toSet());
LabelManager newLabelManager = getLabelManager().getLabelManagerForExport(retainedDocIDs, export_mode);
setStatusProvider.accept(new StaticStatusProvider(statusmsg + ":" + "Exporting LabelManager.."));
setLabelManager(newLabelManager);
// copy index and if for public mode, also redact body and remove title
// fields
final boolean redact_body_instead_of_remove = true;
/* Set<String> docIdSet = new LinkedHashSet<>();
for (Document d : allDocs)
docIdSet.add(d.getUniqueId());
final Set<String> retainedDocIds = docIdSet;*/
Indexer.FilterFunctor emailFilter = doc -> {
if (!retainedDocIDs.contains(doc.get("docId")))
return false;
if (exportInPublicMode) {
String text;
if (redact_body_instead_of_remove) {
text = doc.get("body");
}
doc.removeFields("body");
doc.removeFields("body_original");
if (text != null) {
String redacted_text = IndexUtils.retainOnlyNames(text, doc);
doc.add(new Field("body", redacted_text, Indexer.full_ft));
// this uses standard analyzer, not stemming because redacted bodys only have names.
}
String title = doc.get("title");
doc.removeFields("title");
if (title != null) {
String redacted_title = IndexUtils.retainOnlyNames(text, doc);
doc.add(new Field("title", redacted_title, Indexer.full_ft));
}
}
return true;
};
/*
Moveing it at the end- after changing the basedir of the archive. Because addressbook is getting saved
after maskEmailDomain.
if (exportInPublicMode) {
List<Document> docs = this.getAllDocs();
List<EmailDocument> eds = new ArrayList<>();
for (Document doc : docs)
eds.add((EmailDocument) doc);
EmailUtils.maskEmailDomain(eds, this.addressBook);
}
*/
Indexer.FilterFunctor attachmentFilter = doc -> {
if (exportInPublicMode) {
return false;
}
String docId = doc.get("emailDocId");
if (docId == null) {
Integer di = Integer.parseInt(doc.get("docId"));
// don't want to print too many messages
if (di < 10)
log.error("Looks like this is an old archive, filtering all the attachments!!\n" + "Consider re-indexing with the latest version for a proper export.");
return false;
}
return retainedDocIDs.contains(docId);
};
setStatusProvider.accept(new StaticStatusProvider(statusmsg + ":" + "Exporting Index.."));
indexer.copyDirectoryWithDocFilter(out_dir + File.separatorChar + Archive.BAG_DATA_FOLDER, emailFilter, attachmentFilter);
log.info("Completed exporting indexes");
setStatusProvider.accept(new StaticStatusProvider(statusmsg + ":" + "Exporting Blobs.."));
// save the blobs in a new blobstore
if (!exportInPublicMode) {
log.info("Starting to export blobs, old blob store is: " + blobStore);
Set<Blob> blobsToKeep = new LinkedHashSet<>();
for (Document d : allDocs) if (d instanceof EmailDocument)
if (!Util.nullOrEmpty(((EmailDocument) d).attachments))
blobsToKeep.addAll(((EmailDocument) d).attachments);
String blobsDir = out_dir + File.separatorChar + Archive.BAG_DATA_FOLDER + File.separatorChar + BLOBS_SUBDIR;
new File(blobsDir).mkdirs();
BlobStore newBlobStore = blobStore.createCopy(blobsDir, blobsToKeep);
log.info("Completed exporting blobs, newBlobStore in dir: " + blobsDir + " is: " + newBlobStore);
// switch to the new blob store (important -- the urls and indexes in the new blob store are different from the old one! */
blobStore = newBlobStore;
}
String oldBaseDir = baseDir;
// change base directory
setBaseDir(out_dir);
if (exportInPublicMode) {
List<Document> docs = this.getAllDocs();
List<EmailDocument> eds = new ArrayList<>();
for (Document doc : docs) eds.add((EmailDocument) doc);
EmailUtils.maskEmailDomain(eds, this.addressBook);
}
setStatusProvider.accept(new StaticStatusProvider(statusmsg + ":" + "Exporting EntityBook Manager.."));
// now read entitybook manager as well (or build from lucene)
String outdir = out_dir + File.separatorChar + Archive.BAG_DATA_FOLDER + File.separatorChar + Archive.SESSIONS_SUBDIR;
String entityBookPath = outdir + File.separatorChar + Archive.ENTITYBOOKMANAGER_SUFFIX;
EntityBookManager entityBookManager = ArchiveReaderWriter.readEntityBookManager(this, entityBookPath);
this.setEntityBookManager(entityBookManager);
// recompute entity count because some documents have been redacted
double theta = 0.001;
// getEntitiesCountMapModuloThreshold(this,theta);
this.collectionMetadata.entityCounts = this.getEntityBookManager().getEntitiesCountMapModuloThreshold(theta);
// write out the archive file.. note that this is a fresh creation of archive in the exported folder
setStatusProvider.accept(new StaticStatusProvider(statusmsg + ":" + "Export done. Saving Archive.."));
// save .session file.
ArchiveReaderWriter.saveArchive(out_dir, name, this, Save_Archive_Mode.FRESH_CREATION);
log.info("Completed saving archive object");
// restore states
setBaseDir(oldBaseDir);
allDocs = savedAllDocs;
setLabelManager(oldLabelManager);
return out_dir;
}
Aggregations