Examples with BlobStore - edu.stanford.muse.datacache.BlobStore

Example 6 with BlobStore

use of edu.stanford.muse.datacache.BlobStore in project epadd by ePADD.

the class Archive method export.

/**
 * a fresh archive is created under out_dir. name is the name of the session
 * under it. blobs are exported into this archive dir. destructive! but
 * should be so only in memory. original files on disk should be unmodified.
 *
 * @param retainedDocs
 * @throws Exception
 */
public synchronized String export(Collection<? extends Document> retainedDocs, Export_Mode export_mode, String out_dir, String name) throws Exception {
    if (Util.nullOrEmpty(out_dir))
        return null;
    File dir = new File(out_dir);
    if (dir.exists() && dir.isDirectory()) {
        log.warn("Overwriting existing directory '" + out_dir + "' (it may already exist)");
        FileUtils.deleteDirectory(dir);
    } else if (!dir.mkdirs()) {
        log.warn("Unable to create directory: " + out_dir);
        return null;
    }
    boolean exportInPublicMode = export_mode == Export_Mode.EXPORT_PROCESSING_TO_DISCOVERY;
    Archive.prepareBaseDir(out_dir);
    if (!exportInPublicMode && new File(baseDir + File.separator + LEXICONS_SUBDIR).exists())
        FileUtils.copyDirectory(new File(baseDir + File.separator + LEXICONS_SUBDIR), new File(out_dir + File.separator + LEXICONS_SUBDIR));
    if (new File(baseDir + File.separator + IMAGES_SUBDIR).exists())
        FileUtils.copyDirectory(new File(baseDir + File.separator + IMAGES_SUBDIR), new File(out_dir + File.separator + IMAGES_SUBDIR));
    // internal disambiguation cache
    if (new File(baseDir + File.separator + FEATURES_SUBDIR).exists())
        FileUtils.copyDirectory(new File(baseDir + File.separator + FEATURES_SUBDIR), new File(out_dir + File.separator + FEATURES_SUBDIR));
    if (new File(baseDir + File.separator + edu.stanford.muse.Config.AUTHORITY_ASSIGNER_FILENAME).exists())
        FileUtils.copyFile(new File(baseDir + File.separator + edu.stanford.muse.Config.AUTHORITY_ASSIGNER_FILENAME), new File(out_dir + File.separator + edu.stanford.muse.Config.AUTHORITY_ASSIGNER_FILENAME));
    // save the states that may get modified
    List<Document> savedAllDocs = allDocs;
    LabelManager oldLabelManager = getLabelManager();
    // change state of the current archive -temporarily//////////
    if (exportInPublicMode) {
        // replace description with names;
        replaceDescriptionWithNames(allDocs, this);
    } else {
        allDocs = new ArrayList<>(retainedDocs);
    }
    Set<String> retainedDocIDs = retainedDocs.stream().map(Document::getUniqueId).collect(Collectors.toSet());
    LabelManager newLabelManager = getLabelManager().getLabelManagerForExport(retainedDocIDs, export_mode);
    setLabelManager(newLabelManager);
    // copy index and if for public mode, also redact body and remove title
    // fields
    final boolean redact_body_instead_of_remove = true;
    Set<String> docIdSet = new LinkedHashSet<>();
    for (Document d : allDocs) docIdSet.add(d.getUniqueId());
    final Set<String> retainedDocIds = docIdSet;
    Indexer.FilterFunctor emailFilter = doc -> {
        if (!retainedDocIds.contains(doc.get("docId")))
            return false;
        if (exportInPublicMode) {
            String text;
            if (redact_body_instead_of_remove) {
                text = doc.get("body");
            }
            doc.removeFields("body");
            doc.removeFields("body_original");
            if (text != null) {
                String redacted_text = IndexUtils.retainOnlyNames(text, doc);
                doc.add(new Field("body", redacted_text, Indexer.full_ft));
            // this uses standard analyzer, not stemming because redacted bodys only have names.
            }
            String title = doc.get("title");
            doc.removeFields("title");
            if (title != null) {
                String redacted_title = IndexUtils.retainOnlyNames(text, doc);
                doc.add(new Field("title", redacted_title, Indexer.full_ft));
            }
        }
        return true;
    };
    /*
Moveing it at the end- after changing the basedir of the archive. Because addressbook is getting saved
after maskEmailDomain.
        if (exportInPublicMode) {
            List<Document> docs = this.getAllDocs();
            List<EmailDocument> eds = new ArrayList<>();
            for (Document doc : docs)
                eds.add((EmailDocument) doc);

            EmailUtils.maskEmailDomain(eds, this.addressBook);
        }
*/
    Indexer.FilterFunctor attachmentFilter = doc -> {
        if (exportInPublicMode) {
            return false;
        }
        String docId = doc.get("emailDocId");
        if (docId == null) {
            Integer di = Integer.parseInt(doc.get("docId"));
            // don't want to print too many messages
            if (di < 10)
                log.error("Looks like this is an old archive, filtering all the attachments!!\n" + "Consider re-indexing with the latest version for a proper export.");
            return false;
        }
        return retainedDocIds.contains(docId);
    };
    indexer.copyDirectoryWithDocFilter(out_dir, emailFilter, attachmentFilter);
    log.info("Completed exporting indexes");
    // save the blobs in a new blobstore
    if (!exportInPublicMode) {
        log.info("Starting to export blobs, old blob store is: " + blobStore);
        Set<Blob> blobsToKeep = new LinkedHashSet<>();
        for (Document d : allDocs) if (d instanceof EmailDocument)
            if (!Util.nullOrEmpty(((EmailDocument) d).attachments))
                blobsToKeep.addAll(((EmailDocument) d).attachments);
        String blobsDir = out_dir + File.separatorChar + BLOBS_SUBDIR;
        new File(blobsDir).mkdirs();
        BlobStore newBlobStore = blobStore.createCopy(blobsDir, blobsToKeep);
        log.info("Completed exporting blobs, newBlobStore in dir: " + blobsDir + " is: " + newBlobStore);
        // switch to the new blob store (important -- the urls and indexes in the new blob store are different from the old one! */
        blobStore = newBlobStore;
    }
    String oldBaseDir = baseDir;
    // change base directory
    setBaseDir(out_dir);
    if (exportInPublicMode) {
        List<Document> docs = this.getAllDocs();
        List<EmailDocument> eds = new ArrayList<>();
        for (Document doc : docs) eds.add((EmailDocument) doc);
        EmailUtils.maskEmailDomain(eds, this.addressBook);
    }
    // write out the archive file
    // save .session file.
    SimpleSessions.saveArchive(out_dir, name, this);
    log.info("Completed saving archive object");
    // restore states
    setBaseDir(oldBaseDir);
    allDocs = savedAllDocs;
    setLabelManager(oldLabelManager);
    return out_dir;
}

Also used : edu.stanford.muse.util(edu.stanford.muse.util) ParseException(org.apache.lucene.queryparser.classic.ParseException) Config(edu.stanford.muse.Config) java.util(java.util) Blob(edu.stanford.muse.datacache.Blob) AnnotationManager(edu.stanford.muse.AnnotationManager.AnnotationManager) NameInfo(edu.stanford.muse.ie.NameInfo) SimpleDateFormat(java.text.SimpleDateFormat) Multimap(com.google.common.collect.Multimap) LabelManager(edu.stanford.muse.LabelManager.LabelManager) Gson(com.google.gson.Gson) edu.stanford.muse.email(edu.stanford.muse.email) CorrespondentAuthorityMapper(edu.stanford.muse.AddressBookManager.CorrespondentAuthorityMapper) EntityBook(edu.stanford.muse.ie.variants.EntityBook) SimpleSessions(edu.stanford.muse.webapp.SimpleSessions) LinkedHashMultimap(com.google.common.collect.LinkedHashMultimap) EmailRenderer(edu.stanford.muse.webapp.EmailRenderer) BlobStore(edu.stanford.muse.datacache.BlobStore) AddressBook(edu.stanford.muse.AddressBookManager.AddressBook) DateTime(org.joda.time.DateTime) FileUtils(org.apache.commons.io.FileUtils) Label(edu.stanford.muse.LabelManager.Label) Collectors(java.util.stream.Collectors) Contact(edu.stanford.muse.AddressBookManager.Contact) Stream(java.util.stream.Stream) java.io(java.io) NER(edu.stanford.muse.ner.NER) Field(org.apache.lucene.document.Field) NEType(edu.stanford.muse.ner.model.NEType) Log(org.apache.commons.logging.Log) LogFactory(org.apache.commons.logging.LogFactory) ModeConfig(edu.stanford.muse.webapp.ModeConfig) JSONArray(org.json.JSONArray) Blob(edu.stanford.muse.datacache.Blob) Field(org.apache.lucene.document.Field) LabelManager(edu.stanford.muse.LabelManager.LabelManager) BlobStore(edu.stanford.muse.datacache.BlobStore)

Example 7 with BlobStore

use of edu.stanford.muse.datacache.BlobStore in project epadd by ePADD.

the class JSPHelper method preparedArchive.

/**
 * creates a new archive and returns it
 */
public static Archive preparedArchive(HttpServletRequest request, String baseDir, List<String> extraOptions) throws IOException {
    List<String> list = new ArrayList<>();
    if (request != null) {
        if ("yearly".equalsIgnoreCase(request.getParameter("period")))
            list.add("-yearly");
        if (request.getParameter("noattachments") != null)
            list.add("-noattachments");
        // filter params
        if ("true".equalsIgnoreCase(request.getParameter("sentOnly")))
            list.add("-sentOnly");
        String str = request.getParameter("dateRange");
        if (str != null && str.length() > 0) {
            list.add("-date");
            list.add(str);
        }
        String keywords = request.getParameter("keywords");
        if (keywords != null && !keywords.equals("")) {
            list.add("-keywords");
            list.add(keywords);
        }
        String filter = request.getParameter("filter");
        if (filter != null && !filter.equals("")) {
            list.add("-filter");
            list.add(filter);
        }
        // advanced options
        if ("true".equalsIgnoreCase(request.getParameter("incrementalTFIDF")))
            list.add("-incrementalTFIDF");
        if ("true".equalsIgnoreCase(request.getParameter("NER")))
            list.add("-NER");
        if (!"true".equalsIgnoreCase(request.getParameter("allText")))
            list.add("-noalltext");
        if ("true".equalsIgnoreCase(request.getParameter("locationsOnly")))
            list.add("-locationsOnly");
        if ("true".equalsIgnoreCase(request.getParameter("orgsOnly")))
            list.add("-orgsOnly");
        if ("true".equalsIgnoreCase(request.getParameter("includeQuotedMessages")))
            list.add("-includeQuotedMessages");
        String subjWeight = request.getParameter("subjectWeight");
        if (subjWeight != null) {
            list.add("-subjectWeight");
            list.add(subjWeight);
        }
    }
    if (!Util.nullOrEmpty(extraOptions))
        list.addAll(extraOptions);
    String[] s = new String[list.size()];
    list.toArray(s);
    // careful about the ordering here.. first setup, then read indexer, then run it
    Archive archive = Archive.createArchive();
    BlobStore blobStore = JSPHelper.preparedBlobStore(baseDir);
    archive.setup(baseDir, blobStore, s);
    log.info("archive setup in " + baseDir);
    return archive;
}

Also used : BlobStore(edu.stanford.muse.datacache.BlobStore)

Example 8 with BlobStore

use of edu.stanford.muse.datacache.BlobStore in project epadd by ePADD.

the class EmailRenderer method getAttachmentDetails.

/*
		Method to extract some key details from an attachment that needs to be displayed in a fancybox in the gallery feature.
		@chinmay, can we get rid of these escapeHTML and escapeJSON and URLEncode?
	 */
private static JsonObject getAttachmentDetails(Archive archive, Blob attachment, Document doc) {
    // prepare json object of the information. The format is
    // {index:'',href:'', from:'', date:'', subject:'',filename:'',downloadURL:'',tileThumbnailURL:'',msgURL:'',info:''}
    // here info field is optional and present only for those attachments which were converted or normalized during the ingestion and therefore need
    // to be notified to the user.
    JsonObject result = new JsonObject();
    String archiveID = ArchiveReaderWriter.getArchiveIDForArchive(archive);
    // Extract mail information
    // Extract few details like sender, date, message body (ellipsized upto some length) and put them in result.
    EmailDocument ed = (EmailDocument) doc;
    // A problematic case when converted json object was throwing error in JS.
    /*
		Case 1: {"from":"Δρ. Θεόδωρος Σίμος \r\n\t- Dr. ***** (redacted)","date":"Jan 5, 2005"}
		Solution: escapeJson will escape these to \\r\\n\\t.

		Case 2: {"from":"李升荧","date":"Dec 19, 2012","subject":"shangwu@jxdyf.com，Please find
			."}
			Problem: There is a newline in subject between find and .
			Solution: escapejson will put \ at the end of find which will make it parsed correctly.
		 */
    // escaping because we might have the name like <jbush@..> in the sender.
    String sender = Util.escapeHTML(ed.getFromString());
    sender = Util.escapeJSON(sender);
    String date = Util.escapeHTML(ed.dateString());
    String subject = Util.escapeHTML(ed.description);
    subject = Util.escapeJSON(subject);
    String docId = ed.getUniqueId();
    // for caption of the assignment
    BlobStore attachmentStore = archive.getBlobStore();
    String filename = attachmentStore.full_filename_normalized(attachment, false);
    // IMP: We want to open set of all those messages which have this attachment. Therefore we don't use docID to open the message.
    // String messageURL = "browse?archiveID="+archiveID+"&docId=" + docId;
    // Use browse?archiveID=...&adv-search=1&attachmentFilename= as the msgurl.
    result.addProperty("filename", Util.escapeHTML(filename));
    String numberedFileName = attachmentStore.full_filename_normalized(attachment, true);
    String messageURL = "browse?archiveID=" + archiveID + "&adv-search=1&attachmentFileWithNumber=" + Util.URLEncode(numberedFileName);
    result.addProperty("filenameWithIndex", numberedFileName);
    result.addProperty("from", sender);
    if (ed.hackyDate)
        result.addProperty("date", "Undated");
    else
        result.addProperty("date", date);
    result.addProperty("subject", subject);
    result.addProperty("msgURL", messageURL);
    // Extract few details like attachment src, thumnbail, search for message url etc and put them in result.
    // tilethumbnailURL is the url of the image displayed on small tile in the gallery landing page
    // thumbnailURL is the url of the image displayed in the gallery mode (inside fancybox). For now both are same but they can be made different
    // later therefore the distinction here.
    String thumbnailURL = null, downloadURL = null, tileThumbnailURL = null;
    if (attachmentStore != null) {
        String contentFileDataStoreURL = attachmentStore.get_URL_Normalized(attachment);
        // IMP: We need to do URLEncode otherwise if filename contains (') then the object creation from json data fails in the frontend.
        // EX. If file's name is Jim's
        downloadURL = "serveAttachment.jsp?archiveID=" + archiveID + "&file=" + Util.URLEncode(Util.URLtail(contentFileDataStoreURL));
        String tnFileDataStoreURL = attachmentStore.getViewURL(attachment, "tn");
        if (tnFileDataStoreURL != null) {
            thumbnailURL = "serveAttachment.jsp?archiveID=" + archiveID + "&file=" + Util.URLEncode(Util.URLtail(tnFileDataStoreURL));
            // set tile's thumbnail (on the landing page of gallery) also same.
            tileThumbnailURL = thumbnailURL;
        } else {
            if (archive.getBlobStore().is_image(attachment)) {
                // and may be wait for the day when both chrome and firefox start supporting them.
                if (Util.getExtension(contentFileDataStoreURL).equals("tif")) {
                    // handle it like non-previewable file.
                    thumbnailURL = "images/tiff_icon.svg";
                    tileThumbnailURL = "images/tiff_icon.svg";
                } else {
                    thumbnailURL = downloadURL;
                    // may be we need to reduce it's size.@TODO
                    tileThumbnailURL = thumbnailURL;
                }
            } else if (Util.is_pdf_filename(contentFileDataStoreURL)) {
                // because pdfs are treated as doc so better to keep it first.
                // thumbnailURL of a pdf can be a pdf image @TODO
                thumbnailURL = "images/pdf_icon.svg";
                tileThumbnailURL = "images/pdf_icon.svg";
            } else if (Util.is_ppt_filename(contentFileDataStoreURL)) {
                // same for ppt
                // thumbnailURL of a ppt can be a ppt image @TODO
                thumbnailURL = "images/ppt_icon.svg";
                tileThumbnailURL = "images/ppt_icon.svg";
            } else if (Util.is_doc_filename(contentFileDataStoreURL)) {
                // thumbnailURL of a doc can be a doc image @TODO
                thumbnailURL = "images/doc_icon.svg";
                tileThumbnailURL = "images/doc_icon.svg";
            } else if (Util.is_zip_filename(contentFileDataStoreURL)) {
                // thumbnailURL of a zip can be a zip image @TODO
                thumbnailURL = "images/zip_icon.svg";
                tileThumbnailURL = "images/zip_icon.svg";
            } else {
                thumbnailURL = "images/large_sorry_img.svg";
                tileThumbnailURL = "images/large_sorry_img.svg";
            }
        }
    } else
        JSPHelper.log.warn("attachments store is null!");
    if (thumbnailURL == null)
        thumbnailURL = "images/large_sorry_img.svg";
    // downloadURL should never be null.
    boolean isNormalized = attachmentStore.isNormalized(attachment);
    boolean isCleanedName = attachmentStore.isCleaned(attachment);
    String cleanupurl = attachmentStore.get_URL_Cleanedup(attachment);
    String info = "";
    if (isNormalized || isCleanedName) {
        String completeurl_cleanup = "serveAttachment.jsp?archiveID=" + archiveID + "&file=" + Util.URLEncode(Util.URLtail(cleanupurl));
        if (isNormalized) {
            info = "This file was converted during the preservation process. Its original name was " + attachmentStore.full_filename_original(attachment, false) + ". Click <a href=" + completeurl_cleanup + ">here </a> to download the original file";
        } else if (isCleanedName) {
            info = "This file name was cleaned up during the preservation process. The original file name was " + attachmentStore.full_filename_original(attachment, false);
        }
    }
    // {index:'',href:'', from:'', date:'', subject:'',filename:'',downloadURL:'',tileThumbnailURL:'',info:'',size:''}
    result.addProperty("size", attachment.size);
    result.addProperty("href", thumbnailURL);
    result.addProperty("downloadURL", downloadURL);
    result.addProperty("tileThumbnailURL", tileThumbnailURL);
    if (// add this field only if this is non-empty. (That is the beauty of json, non-fixed structure for the data).
    !Util.nullOrEmpty(info))
        result.addProperty("info", info);
    return result;
}

Also used : JsonObject(com.google.gson.JsonObject) BlobStore(edu.stanford.muse.datacache.BlobStore)

Example 9 with BlobStore

use of edu.stanford.muse.datacache.BlobStore in project epadd by ePADD.

the class JSPHelper method preparedBlobStore.

/*
	 * creates a new blob store object from the given location (may already
	 * exist) and returns it
	 */
private static BlobStore preparedBlobStore(String baseDir) throws IOException {
    // always set up attachmentsStore even if we are not fetching attachments
    // because the user may already have stuff isn it -- if so, we should make it available.
    String attachmentsStoreDir = baseDir + File.separatorChar + Archive.BLOBS_SUBDIR + File.separator;
    BlobStore attachmentsStore;
    try {
        File f = new File(attachmentsStoreDir);
        // the return value is not relevant
        f.mkdirs();
        if (!f.exists() || !f.isDirectory() || !f.canWrite())
            throw new IOException("Unable to create directory for writing: " + attachmentsStoreDir);
        attachmentsStore = new BlobStore(attachmentsStoreDir);
    } catch (IOException ioe) {
        log.error("MAJOR ERROR: Disabling attachments because unable to initialize attachments store in directory: " + attachmentsStoreDir + " :" + ioe + " " + Util.stackTrace(ioe));
        attachmentsStore = null;
        throw (ioe);
    }
    return attachmentsStore;
}

Also used : BlobStore(edu.stanford.muse.datacache.BlobStore)

Example 10 with BlobStore

use of edu.stanford.muse.datacache.BlobStore in project epadd by ePADD.

the class MuseEmailFetcher method fetchAndIndexEmails.

/**
 * key method to fetch actual email messages. can take a long time.
 * @param session is used only to set the status provider object. callers who do not need to track status can leave it as null
 * @param selectedFolders is in the format <account name>^-^<folder name>
 * @param session is used only to put a status object in. can be null in which case status object is not set.
 * emailDocs, addressBook and blobstore
 * @throws NoDefaultFolderException
 */
public void fetchAndIndexEmails(Archive archive, String[] selectedFolders, boolean useDefaultFolders, FetchConfig fetchConfig, HttpSession session, Consumer<StatusProvider> setStatusProvider) throws InterruptedException, JSONException, NoDefaultFolderException, CancelledException {
    setupFetchers(-1);
    long startTime = System.currentTimeMillis();
    setStatusProvider.accept(new StaticStatusProvider("Starting to process messages..."));
    // if (session != null)
    // session.setAttribute("statusProvider", new StaticStatusProvider("Starting to process messages..."));
    boolean op_cancelled = false, out_of_mem = false;
    BlobStore attachmentsStore = archive.getBlobStore();
    fetchConfig.downloadAttachments = fetchConfig.downloadAttachments && attachmentsStore != null;
    if (Util.nullOrEmpty(fetchers)) {
        log.warn("Trying to fetch email with no fetchers, setup not called ?");
        return;
    }
    setupFoldersForFetchers(fetchers, selectedFolders, useDefaultFolders);
    List<FolderInfo> fetchedFolderInfos = new ArrayList<>();
    // one fetcher will aggregate everything
    FetchStats stats = new FetchStats();
    MTEmailFetcher aggregatingFetcher = null;
    // a fetcher is one source, like an account or a top-level mbox dir. A fetcher could include multiple folders.
    long startTimeMillis = System.currentTimeMillis();
    for (MTEmailFetcher fetcher : fetchers) {
        // in theory, different iterations of this loop could be run in parallel ("archive" access will be synchronized)
        setStatusProvider.accept(fetcher);
        /*if (session != null)
				session.setAttribute("statusProvider", fetcher);
*/
        fetcher.setArchive(archive);
        fetcher.setFetchConfig(fetchConfig);
        log.info("Memory status before fetching emails: " + Util.getMemoryStats());
        // this is the big call, can run for a long time. Note: running in the same thread, its not fetcher.start();
        List<FolderInfo> foldersFetchedByThisFetcher = fetcher.run();
        // but don't abort immediately, only at the end, after addressbook has been built for at least the processed messages
        if (fetcher.isCancelled()) {
            log.info("NOTE: fetcher operation was cancelled");
            op_cancelled = true;
            break;
        }
        if (fetcher.mayHaveRunOutOfMemory()) {
            log.warn("Fetcher operation ran out of memory " + fetcher);
            out_of_mem = true;
            break;
        }
        fetchedFolderInfos.addAll(foldersFetchedByThisFetcher);
        if (aggregatingFetcher == null && !Util.nullOrEmpty(foldersFetchedByThisFetcher))
            // first non-empty fetcher
            aggregatingFetcher = fetcher;
        if (aggregatingFetcher != null)
            aggregatingFetcher.merge(fetcher);
        // add the indexed folders to the stats
        EmailStore store = fetcher.getStore();
        String fetcherDescription = store.displayName + ":" + store.emailAddress;
        for (FolderInfo fi : fetchedFolderInfos) stats.selectedFolders.add(new Pair<>(fetcherDescription, fi));
    }
    if (op_cancelled)
        throw new CancelledException();
    if (out_of_mem)
        throw new OutOfMemoryError();
    if (aggregatingFetcher != null) {
        stats.importStats = aggregatingFetcher.stats;
        if (aggregatingFetcher.mayHaveRunOutOfMemory())
            throw new OutOfMemoryError();
    }
    // save memory
    aggregatingFetcher = null;
    long endTimeMillis = System.currentTimeMillis();
    long elapsedMillis = endTimeMillis - startTimeMillis;
    log.info(elapsedMillis + " ms for fetch+index, Memory status: " + Util.getMemoryStats());
    // note: this is all archive docs, not just the ones that may have been just imported
    List<EmailDocument> allEmailDocs = (List) archive.getAllDocs();
    archive.addFetchedFolderInfos(fetchedFolderInfos);
    if (allEmailDocs.size() == 0)
        log.warn("0 messages from email fetcher");
    // EmailUtils.cleanDates(allEmailDocs);
    // create a new address book
    // if (session != null)
    // session.setAttribute("statusProvider", new StaticStatusProvider("Building address book..."));
    setStatusProvider.accept(new StaticStatusProvider("Building address book..."));
    AddressBook addressBook = EmailDocument.buildAddressBook(allEmailDocs, archive.ownerEmailAddrs, archive.ownerNames);
    log.info("Address book created!!");
    log.info("Address book stats: " + addressBook.getStats());
    // if (session != null)
    // session.setAttribute("statusProvider", new StaticStatusProvider("Finishing up..."));
    setStatusProvider.accept(new StaticStatusProvider("Finishing up..."));
    archive.setAddressBook(addressBook);
    // we shouldn't really have dups now because the archive ensures that only unique docs are added
    // move sorting to archive.postprocess?
    EmailUtils.removeDupsAndSort(allEmailDocs);
    // report stats
    stats.lastUpdate = new Date().getTime();
    // For issue #254.
    stats.archiveOwnerInput = name;
    stats.archiveTitleInput = archiveTitle;
    stats.primaryEmailInput = alternateEmailAddrs;
    stats.emailSourcesInput = emailSources;
    // ////
    // (String) JSPHelper.getSessionAttribute(session, "userKey");
    stats.userKey = "USER KEY UNUSED";
    stats.fetchAndIndexTimeMillis = elapsedMillis;
    updateStats(archive, addressBook, stats);
    // if (session != null)
    // session.removeAttribute("statusProvider");
    log.info("Fetch+index complete: " + Util.commatize(System.currentTimeMillis() - startTime) + " ms");
}

Also used : CancelledException(edu.stanford.muse.exceptions.CancelledException) EmailDocument(edu.stanford.muse.index.EmailDocument) AddressBook(edu.stanford.muse.AddressBookManager.AddressBook) BlobStore(edu.stanford.muse.datacache.BlobStore) Pair(edu.stanford.muse.util.Pair)

Aggregations

BlobStore (edu.stanford.muse.datacache.BlobStore)10 AddressBook (edu.stanford.muse.AddressBookManager.AddressBook)4 Blob (edu.stanford.muse.datacache.Blob)4 Pair (edu.stanford.muse.util.Pair)4 LinkedHashMultimap (com.google.common.collect.LinkedHashMultimap)2 Multimap (com.google.common.collect.Multimap)2 Gson (com.google.gson.Gson)2 CorrespondentAuthorityMapper (edu.stanford.muse.AddressBookManager.CorrespondentAuthorityMapper)2 AnnotationManager (edu.stanford.muse.AnnotationManager.AnnotationManager)2 Config (edu.stanford.muse.Config)2 Label (edu.stanford.muse.LabelManager.Label)2 LabelManager (edu.stanford.muse.LabelManager.LabelManager)2 edu.stanford.muse.email (edu.stanford.muse.email)2 CancelledException (edu.stanford.muse.exceptions.CancelledException)2 NameInfo (edu.stanford.muse.ie.NameInfo)2 EmailDocument (edu.stanford.muse.index.EmailDocument)2 NER (edu.stanford.muse.ner.NER)2 NEType (edu.stanford.muse.ner.model.NEType)2 edu.stanford.muse.util (edu.stanford.muse.util)2 EmailRenderer (edu.stanford.muse.webapp.EmailRenderer)2