Search in sources :

Example 21 with Blob

use of edu.stanford.muse.datacache.Blob in project epadd by ePADD.

the class ArchiveReaderWriter method recalculateCollectionMetadata.

public static void recalculateCollectionMetadata(Archive archive) {
    if (archive.collectionMetadata == null)
        archive.collectionMetadata = new Archive.CollectionMetadata();
    archive.collectionMetadata.timestamp = new Date().getTime();
    archive.collectionMetadata.tz = TimeZone.getDefault().getID();
    archive.collectionMetadata.nDocs = archive.getAllDocs().size();
    archive.collectionMetadata.nUniqueBlobs = archive.blobStore.uniqueBlobs.size();
    int totalAttachments = 0, images = 0, docs = 0, others = 0, sentMessages = 0, receivedMessages = 0, hackyDates = 0;
    Date firstDate = null, lastDate = null;
    for (Document d : archive.getAllDocs()) {
        if (!(d instanceof EmailDocument))
            continue;
        EmailDocument ed = (EmailDocument) d;
        if (ed.date != null) {
            if (ed.hackyDate)
                hackyDates++;
            else {
                if (firstDate == null || ed.date.before(firstDate))
                    firstDate = ed.date;
                if (lastDate == null || ed.date.after(lastDate))
                    lastDate = ed.date;
            }
        }
        int sentOrReceived = ed.sentOrReceived(archive.addressBook);
        if ((sentOrReceived & EmailDocument.SENT_MASK) != 0)
            sentMessages++;
        if ((sentOrReceived & EmailDocument.RECEIVED_MASK) != 0)
            receivedMessages++;
        if (!Util.nullOrEmpty(ed.attachments)) {
            totalAttachments += ed.attachments.size();
            for (Blob b : ed.attachments) if (!Util.nullOrEmpty(archive.getBlobStore().get_URL_Normalized(b))) {
                if (Util.is_image_filename(archive.getBlobStore().get_URL_Normalized(b)))
                    images++;
                else if (Util.is_doc_filename(archive.getBlobStore().get_URL_Normalized(b)))
                    docs++;
                else
                    others++;
            }
        }
    }
    archive.collectionMetadata.firstDate = firstDate;
    archive.collectionMetadata.lastDate = lastDate;
    // Fill in the current locale and unix timestamp values for firstDate and lastDate. For more information on these variables refer to the
    // class definition of CollectionMetaData.
    archive.collectionMetadata.setIngestionLocaleTag(Locale.getDefault().toLanguageTag());
    if (firstDate != null)
        archive.collectionMetadata.firstDateTS = firstDate.getTime();
    if (lastDate != null)
        archive.collectionMetadata.lastDateTS = lastDate.getTime();
    archive.collectionMetadata.nIncomingMessages = receivedMessages;
    archive.collectionMetadata.nOutgoingMessages = archive.getAddressBook().getMsgsSentByOwner();
    archive.collectionMetadata.nHackyDates = hackyDates;
    archive.collectionMetadata.nBlobs = totalAttachments;
    archive.collectionMetadata.nUniqueBlobs = archive.blobStore.uniqueBlobs.size();
    archive.collectionMetadata.nImageBlobs = images;
    archive.collectionMetadata.nDocBlobs = docs;
    archive.collectionMetadata.nOtherBlobs = others;
}
Also used : Blob(edu.stanford.muse.datacache.Blob)

Example 22 with Blob

use of edu.stanford.muse.datacache.Blob in project epadd by ePADD.

the class MuseEmailFetcher method updateStats.

/**
 * this should probably move to archive.java
 */
private void updateStats(Archive archive, AddressBook addressBook, FetchStats stats) {
    Collection<EmailDocument> allEmailDocs = (Collection) archive.getAllDocs();
    // the rest of this is basically stats collection
    int nSent = 0, nReceived = 0;
    for (EmailDocument ed : allEmailDocs) {
        Pair<Boolean, Boolean> p = addressBook.isSentOrReceived(ed.getToCCBCC(), ed.from);
        boolean sent = p.getFirst();
        boolean received = p.getSecond();
        if (sent)
            nSent++;
        if (received)
            nReceived++;
    }
    stats.dataErrors = getDataErrors();
    stats.nMessagesInArchive = allEmailDocs.size();
    /* compute stats for time range */
    if (allEmailDocs.size() > 0) {
        Pair<Date, Date> p = EmailUtils.getFirstLast(allEmailDocs);
        stats.firstMessageDate = p.getFirst() == null ? 0 : p.getFirst().getTime();
        stats.lastMessageDate = p.getSecond() == null ? 0 : p.getSecond().getTime();
    }
    // add stat for the duplicate messages that is stored in dupMessageInfo field of archive and is filled by MuseEmailFetcher while fetching messages..
    // the errors of duplicates need to be properly formatted using the map dupmessageinfo
    long sizeSavedFromDupMessages = 0;
    long sizeSavedFromDupAttachments = 0;
    Collection<String> dupMessages = new LinkedHashSet<>();
    for (Document doc : archive.getDupMessageInfo().keySet()) {
        EmailDocument edoc = (EmailDocument) doc;
        StringBuilder sb = new StringBuilder();
        long sizesaved = 0;
        long totalsize = 0;
        // number of duplicates found for this emaildocument
        int numofduplicates = archive.getDupMessageInfo().get(doc).size();
        // get the size of attachments
        sb.append("Duplicate message:" + " Following messages were found as duplicates of\n    message id #" + edoc.getUniqueId() + " (" + edoc.folderName + "):\n");
        for (Blob b : edoc.attachments) {
            totalsize += b.size;
        }
        sizesaved = (numofduplicates) * totalsize;
        int count = 1;
        for (Tuple2 s : archive.getDupMessageInfo().get(doc)) {
            sb.append("      " + count + ". " + "Message id # " + s.getSecond() + " (" + s.getFirst() + ")\n");
            count++;
        }
        if (sizesaved != 0) {
            sb.append("***** Saved " + sizesaved + " bytes by detecting these duplicates\n");
            sizeSavedFromDupMessages += sizesaved;
        }
        dupMessages.add(sb.toString());
    }
    stats.dataErrors.addAll(dupMessages);
    // also add stat for blobstore
    Collection<String> dupBlobMessages = new LinkedHashSet<>();
    Map<Blob, Integer> dupblobs = archive.getBlobStore().getDupBlobCount();
    if (dupblobs.size() > 0) {
        for (Blob b : dupblobs.keySet()) {
            dupBlobMessages.add("Duplicate attachments:" + dupblobs.get(b) + " duplicate attachments found of " + archive.getBlobStore().full_filename_normalized(b) + ". Total space saved by not storing these duplicates is " + dupblobs.get(b) * b.size + " bytes\n");
            sizeSavedFromDupAttachments += dupblobs.get(b) * b.size;
        }
    }
    stats.dataErrors.addAll(dupBlobMessages);
    stats.spaceSavingFromDupMessageDetection = sizeSavedFromDupMessages / 1000;
    stats.spaceSavingFromDupAttachmentDetection = sizeSavedFromDupAttachments / 1000;
    // stats.dataErrors.add("Space saving from duplicate detection:" +sizeSavedFromDupMessages/1000 + "KB saved by detecting duplicate messages\n");
    // stats.dataErrors.add("Space saving from duplicate detection:" +sizeSavedFromDupAttachments/1000 + "KB saved by detecting duplicate attachments\n");
    archive.addStats(stats);
    log.info("Fetcher stats: " + stats);
}
Also used : Blob(edu.stanford.muse.datacache.Blob) EmailDocument(edu.stanford.muse.index.EmailDocument) Document(edu.stanford.muse.index.Document) EmailDocument(edu.stanford.muse.index.EmailDocument) Tuple2(groovy.lang.Tuple2)

Example 23 with Blob

use of edu.stanford.muse.datacache.Blob in project epadd by ePADD.

the class Archive method export.

/**
 * a fresh archive is created under out_dir. name is the name of the session
 * under it. blobs are exported into this archive dir. destructive! but
 * should be so only in memory. original files on disk should be unmodified.
 *
 * @param retainedDocs
 * @throws Exception
 */
public synchronized String export(Collection<? extends Document> retainedDocs, Export_Mode export_mode, String out_dir, String name) throws Exception {
    if (Util.nullOrEmpty(out_dir))
        return null;
    File dir = new File(out_dir);
    if (dir.exists() && dir.isDirectory()) {
        log.warn("Overwriting existing directory '" + out_dir + "' (it may already exist)");
        FileUtils.deleteDirectory(dir);
    } else if (!dir.mkdirs()) {
        log.warn("Unable to create directory: " + out_dir);
        return null;
    }
    boolean exportInPublicMode = export_mode == Export_Mode.EXPORT_PROCESSING_TO_DISCOVERY;
    Archive.prepareBaseDir(out_dir);
    if (!exportInPublicMode && new File(baseDir + File.separator + LEXICONS_SUBDIR).exists())
        FileUtils.copyDirectory(new File(baseDir + File.separator + LEXICONS_SUBDIR), new File(out_dir + File.separator + LEXICONS_SUBDIR));
    if (new File(baseDir + File.separator + IMAGES_SUBDIR).exists())
        FileUtils.copyDirectory(new File(baseDir + File.separator + IMAGES_SUBDIR), new File(out_dir + File.separator + IMAGES_SUBDIR));
    // internal disambiguation cache
    if (new File(baseDir + File.separator + FEATURES_SUBDIR).exists())
        FileUtils.copyDirectory(new File(baseDir + File.separator + FEATURES_SUBDIR), new File(out_dir + File.separator + FEATURES_SUBDIR));
    if (new File(baseDir + File.separator + edu.stanford.muse.Config.AUTHORITY_ASSIGNER_FILENAME).exists())
        FileUtils.copyFile(new File(baseDir + File.separator + edu.stanford.muse.Config.AUTHORITY_ASSIGNER_FILENAME), new File(out_dir + File.separator + edu.stanford.muse.Config.AUTHORITY_ASSIGNER_FILENAME));
    // save the states that may get modified
    List<Document> savedAllDocs = allDocs;
    LabelManager oldLabelManager = getLabelManager();
    // change state of the current archive -temporarily//////////
    if (exportInPublicMode) {
        // replace description with names;
        replaceDescriptionWithNames(allDocs, this);
    } else {
        allDocs = new ArrayList<>(retainedDocs);
    }
    Set<String> retainedDocIDs = retainedDocs.stream().map(Document::getUniqueId).collect(Collectors.toSet());
    LabelManager newLabelManager = getLabelManager().getLabelManagerForExport(retainedDocIDs, export_mode);
    setLabelManager(newLabelManager);
    // copy index and if for public mode, also redact body and remove title
    // fields
    final boolean redact_body_instead_of_remove = true;
    Set<String> docIdSet = new LinkedHashSet<>();
    for (Document d : allDocs) docIdSet.add(d.getUniqueId());
    final Set<String> retainedDocIds = docIdSet;
    Indexer.FilterFunctor emailFilter = doc -> {
        if (!retainedDocIds.contains(doc.get("docId")))
            return false;
        if (exportInPublicMode) {
            String text;
            if (redact_body_instead_of_remove) {
                text = doc.get("body");
            }
            doc.removeFields("body");
            doc.removeFields("body_original");
            if (text != null) {
                String redacted_text = IndexUtils.retainOnlyNames(text, doc);
                doc.add(new Field("body", redacted_text, Indexer.full_ft));
            // this uses standard analyzer, not stemming because redacted bodys only have names.
            }
            String title = doc.get("title");
            doc.removeFields("title");
            if (title != null) {
                String redacted_title = IndexUtils.retainOnlyNames(text, doc);
                doc.add(new Field("title", redacted_title, Indexer.full_ft));
            }
        }
        return true;
    };
    /*
Moveing it at the end- after changing the basedir of the archive. Because addressbook is getting saved
after maskEmailDomain.
        if (exportInPublicMode) {
            List<Document> docs = this.getAllDocs();
            List<EmailDocument> eds = new ArrayList<>();
            for (Document doc : docs)
                eds.add((EmailDocument) doc);

            EmailUtils.maskEmailDomain(eds, this.addressBook);
        }
*/
    Indexer.FilterFunctor attachmentFilter = doc -> {
        if (exportInPublicMode) {
            return false;
        }
        String docId = doc.get("emailDocId");
        if (docId == null) {
            Integer di = Integer.parseInt(doc.get("docId"));
            // don't want to print too many messages
            if (di < 10)
                log.error("Looks like this is an old archive, filtering all the attachments!!\n" + "Consider re-indexing with the latest version for a proper export.");
            return false;
        }
        return retainedDocIds.contains(docId);
    };
    indexer.copyDirectoryWithDocFilter(out_dir, emailFilter, attachmentFilter);
    log.info("Completed exporting indexes");
    // save the blobs in a new blobstore
    if (!exportInPublicMode) {
        log.info("Starting to export blobs, old blob store is: " + blobStore);
        Set<Blob> blobsToKeep = new LinkedHashSet<>();
        for (Document d : allDocs) if (d instanceof EmailDocument)
            if (!Util.nullOrEmpty(((EmailDocument) d).attachments))
                blobsToKeep.addAll(((EmailDocument) d).attachments);
        String blobsDir = out_dir + File.separatorChar + BLOBS_SUBDIR;
        new File(blobsDir).mkdirs();
        BlobStore newBlobStore = blobStore.createCopy(blobsDir, blobsToKeep);
        log.info("Completed exporting blobs, newBlobStore in dir: " + blobsDir + " is: " + newBlobStore);
        // switch to the new blob store (important -- the urls and indexes in the new blob store are different from the old one! */
        blobStore = newBlobStore;
    }
    String oldBaseDir = baseDir;
    // change base directory
    setBaseDir(out_dir);
    if (exportInPublicMode) {
        List<Document> docs = this.getAllDocs();
        List<EmailDocument> eds = new ArrayList<>();
        for (Document doc : docs) eds.add((EmailDocument) doc);
        EmailUtils.maskEmailDomain(eds, this.addressBook);
    }
    // write out the archive file
    // save .session file.
    SimpleSessions.saveArchive(out_dir, name, this);
    log.info("Completed saving archive object");
    // restore states
    setBaseDir(oldBaseDir);
    allDocs = savedAllDocs;
    setLabelManager(oldLabelManager);
    return out_dir;
}
Also used : edu.stanford.muse.util(edu.stanford.muse.util) ParseException(org.apache.lucene.queryparser.classic.ParseException) Config(edu.stanford.muse.Config) java.util(java.util) Blob(edu.stanford.muse.datacache.Blob) AnnotationManager(edu.stanford.muse.AnnotationManager.AnnotationManager) NameInfo(edu.stanford.muse.ie.NameInfo) SimpleDateFormat(java.text.SimpleDateFormat) Multimap(com.google.common.collect.Multimap) LabelManager(edu.stanford.muse.LabelManager.LabelManager) Gson(com.google.gson.Gson) edu.stanford.muse.email(edu.stanford.muse.email) CorrespondentAuthorityMapper(edu.stanford.muse.AddressBookManager.CorrespondentAuthorityMapper) EntityBook(edu.stanford.muse.ie.variants.EntityBook) SimpleSessions(edu.stanford.muse.webapp.SimpleSessions) LinkedHashMultimap(com.google.common.collect.LinkedHashMultimap) EmailRenderer(edu.stanford.muse.webapp.EmailRenderer) BlobStore(edu.stanford.muse.datacache.BlobStore) AddressBook(edu.stanford.muse.AddressBookManager.AddressBook) DateTime(org.joda.time.DateTime) FileUtils(org.apache.commons.io.FileUtils) Label(edu.stanford.muse.LabelManager.Label) Collectors(java.util.stream.Collectors) Contact(edu.stanford.muse.AddressBookManager.Contact) Stream(java.util.stream.Stream) java.io(java.io) NER(edu.stanford.muse.ner.NER) Field(org.apache.lucene.document.Field) NEType(edu.stanford.muse.ner.model.NEType) Log(org.apache.commons.logging.Log) LogFactory(org.apache.commons.logging.LogFactory) ModeConfig(edu.stanford.muse.webapp.ModeConfig) JSONArray(org.json.JSONArray) Blob(edu.stanford.muse.datacache.Blob) Field(org.apache.lucene.document.Field) LabelManager(edu.stanford.muse.LabelManager.LabelManager) BlobStore(edu.stanford.muse.datacache.BlobStore)

Example 24 with Blob

use of edu.stanford.muse.datacache.Blob in project epadd by ePADD.

the class SimpleSessions method saveArchive.

/**
 * saves the archive in the current session to the cachedir. note: no blobs saved.
 */
public static boolean saveArchive(String baseDir, String name, Archive archive) throws IOException {
    log.info("Before saving the archive checking if it is still in good shape");
    archive.Verify();
    String dir = baseDir + File.separatorChar + Archive.SESSIONS_SUBDIR;
    // just to be safe
    new File(dir).mkdirs();
    String filename = dir + File.separatorChar + name + SimpleSessions.SESSION_SUFFIX;
    log.info("Saving archive to (session) file " + filename);
    /*//file path names of addressbook, entitybook and correspondentAuthorityMapper data.
		String addressBookPath = dir + File.separatorChar + Archive.ADDRESSBOOK_SUFFIX;
		String entityBookPath = dir + File.separatorChar + Archive.ENTITYBOOK_SUFFIX;
		String cAuthorityPath =  dir + File.separatorChar + Archive.CAUTHORITYMAPPER_SUFFIX;
		*/
    if (archive.collectionMetadata == null)
        archive.collectionMetadata = new Archive.CollectionMetadata();
    archive.collectionMetadata.timestamp = new Date().getTime();
    archive.collectionMetadata.tz = TimeZone.getDefault().getID();
    archive.collectionMetadata.nDocs = archive.getAllDocs().size();
    archive.collectionMetadata.nUniqueBlobs = archive.blobStore.uniqueBlobs.size();
    int totalAttachments = 0, images = 0, docs = 0, others = 0, sentMessages = 0, receivedMessages = 0, hackyDates = 0;
    Date firstDate = null, lastDate = null;
    for (Document d : archive.getAllDocs()) {
        if (!(d instanceof EmailDocument))
            continue;
        EmailDocument ed = (EmailDocument) d;
        if (ed.date != null) {
            if (ed.hackyDate)
                hackyDates++;
            else {
                if (firstDate == null || ed.date.before(firstDate))
                    firstDate = ed.date;
                if (lastDate == null || ed.date.after(lastDate))
                    lastDate = ed.date;
            }
        }
        int sentOrReceived = ed.sentOrReceived(archive.addressBook);
        if ((sentOrReceived & EmailDocument.SENT_MASK) != 0)
            sentMessages++;
        if ((sentOrReceived & EmailDocument.RECEIVED_MASK) != 0)
            receivedMessages++;
        if (!Util.nullOrEmpty(ed.attachments)) {
            totalAttachments += ed.attachments.size();
            for (Blob b : ed.attachments) if (!Util.nullOrEmpty(b.filename)) {
                if (Util.is_image_filename(b.filename))
                    images++;
                else if (Util.is_doc_filename(b.filename))
                    docs++;
                else
                    others++;
            }
        }
    }
    archive.collectionMetadata.firstDate = firstDate;
    archive.collectionMetadata.lastDate = lastDate;
    archive.collectionMetadata.nIncomingMessages = receivedMessages;
    archive.collectionMetadata.nOutgoingMessages = sentMessages;
    archive.collectionMetadata.nHackyDates = hackyDates;
    archive.collectionMetadata.nBlobs = totalAttachments;
    archive.collectionMetadata.nUniqueBlobs = archive.blobStore.uniqueBlobs.size();
    archive.collectionMetadata.nImageBlobs = images;
    archive.collectionMetadata.nDocBlobs = docs;
    archive.collectionMetadata.nOtherBlobs = others;
    try (ObjectOutputStream oos = new ObjectOutputStream(new GZIPOutputStream(new FileOutputStream(filename)))) {
        oos.writeObject("archive");
        oos.writeObject(archive);
    } catch (Exception e1) {
        Util.print_exception("Failed to write archive: ", e1, log);
    }
    // Now write modular transient fields to separate files-
    // By Dec 2017 there are three transient fields which will be saved and loaded separately
    // 1. AddressBook -- Stored in a gzip file with name in the same `	directory as of archive.
    // 2. EntityBook
    // 3. CorrespondentAuthorityMapper
    // Before final release of v5 in Feb 2018, modularize annotation out of archive.
    // ///////////////AddressBook Writing -- In human readable form ///////////////////////////////////
    SimpleSessions.saveAddressBook(archive);
    // //////////////EntityBook Writing -- In human readable form/////////////////////////////////////
    SimpleSessions.saveEntityBook(archive);
    // /////////////CAuthorityMapper Writing-- Serialized///////////////////////////////
    SimpleSessions.saveCorrespondentAuthorityMapper(archive);
    // ////////////LabelManager Writing -- Serialized//////////////////////////////////
    SimpleSessions.saveLabelManager(archive);
    // ////////////AnnotationManager writing-- In human readable form/////////////////////////////////////
    SimpleSessions.saveAnnotations(archive);
    writeCollectionMetadata(archive.collectionMetadata, baseDir);
    /*

        // now write out the metadata
		String processingFilename = dir + File.separatorChar + name + Config.COLLECTION_METADATA_FILE;
		oos = new ObjectOutputStream(new GZIPOutputStream(new FileOutputStream(processingFilename)));
		try {
			oos.writeObject(archive.collectionMetadata);
		} catch (Exception e1) {
            Util.print_exception("Failed to write archive's metadata: ", e1, log);
			oos.close();
		} finally {
			oos.close();
		}
		*/
    /*

		if (archive.correspondentAuthorityMapper!= null) {
			String authorityMapperFilename = dir + File.separatorChar + name + Config.AUTHORITIES_FILENAME;
			oos = new ObjectOutputStream(new GZIPOutputStream(new FileOutputStream(authorityMapperFilename)));
			try {
				oos.writeObject(archive.correspondentAuthorityMapper);
			} catch (Exception e1) {
				Util.print_exception("Failed to write archive's authority mapper: ", e1, log);
				oos.close();
			} finally {
				oos.close();
			}
		}
*/
    archive.close();
    // re-open for reading
    archive.openForRead();
    // note: no need of saving archive authorities separately -- they are already saved as part of the archive object
    return true;
}
Also used : CollectionMetadata(edu.stanford.muse.index.Archive.CollectionMetadata) Blob(edu.stanford.muse.datacache.Blob) GZIPOutputStream(java.util.zip.GZIPOutputStream) EmailDocument(edu.stanford.muse.index.EmailDocument) Document(edu.stanford.muse.index.Document) EmailDocument(edu.stanford.muse.index.EmailDocument) ParseException(org.apache.lucene.queryparser.classic.ParseException) LockObtainFailedException(org.apache.lucene.store.LockObtainFailedException) CorruptIndexException(org.apache.lucene.index.CorruptIndexException)

Example 25 with Blob

use of edu.stanford.muse.datacache.Blob in project epadd by ePADD.

the class EmailRenderer method htmlAndJSForAttachments.

/**
 * this method returns html and js to be injected when a particular year is reached in browseAttachments.jspf
 * These variables are then used to setup fancy box for attachment browsing process.
 */
public static Pair<String, String> htmlAndJSForAttachments(List<Document> docs, int year, boolean isHackyYearPresent, SearchResult searchResult, Multimap<String, String> queryparams) {
    JSPHelper.log.debug("Generating HTML for attachments in year: " + year);
    Pattern pattern = null;
    try {
        pattern = Pattern.compile(EmailRenderer.EXCLUDED_EXT);
    } catch (Exception e) {
        Util.report_exception(e);
    }
    Archive archive = searchResult.getArchive();
    // get the set of attachments types that the user is interested in.
    Set<String> attachmentTypesOfInterest = IndexUtils.getAttachmentExtensionsOfInterest(queryparams);
    Map<Document, List<Blob>> docAttachmentMap = new LinkedHashMap<>();
    int numAttachments = 0;
    // step 1: get the set of attachments and their number for docs in year.
    for (Document doc : docs) {
        if (doc instanceof EmailDocument) {
            // for email docs, 1 doc = 1 page
            EmailDocument ed = (EmailDocument) doc;
            // collect attachments only if the year of this document is same as passed argument 'year'
            // should set timezone too.. @TODO
            Calendar calendar = new GregorianCalendar();
            calendar.setTime(ed.date);
            if (calendar.get(Calendar.YEAR) == year) {
                // prepare a list of attachments (not set) keeping possible repetition but also count unique attachments here.
                // docAttachmentMap.put(ed, ed.attachments);
                // NOTE: Don't put all attachments present here. We want to display only the attachments of interest to the user otherwise user will see all
                // the attachments present in a message that had at least one attachment type of interest. This was causing confusion to the users.
                List<Blob> attachments = ed.attachments;
                List<Blob> attachmentsOfInterest = new LinkedList<>();
                if (attachments != null)
                    for (Blob b : attachments) {
                        String ext = Util.getExtension(archive.getBlobStore().get_URL_Normalized(b));
                        if (ext == null)
                            ext = "Unidentified";
                        ext = ext.toLowerCase();
                        if (pattern.matcher(ext).find()) {
                            // don't consider any attachment that has extension of the form [0-9]+
                            continue;
                        }
                        if (attachmentTypesOfInterest != null && !attachmentTypesOfInterest.contains(ext))
                            continue;
                        // else add it in the set of attachments to display for this doc.
                        attachmentsOfInterest.add(b);
                    }
                docAttachmentMap.put(ed, attachmentsOfInterest);
                numAttachments += attachmentsOfInterest.size();
            }
        }
    }
    StringBuilder html = new StringBuilder();
    html.append("<div class=\"muse-doc\">\n");
    // this will be injected into the page directly, so fancybox js has access to it
    JsonArray attachmentDetails = new JsonArray();
    // create HTML for tiles view and append to page, also populate attachmentDetails
    // a flag to detect if any of the attachment has normalization or cleanup info present because if it is then the
    boolean isNormalized = false;
    // Number of duplicate attachments (we just count the number of messages in which one attachment appears - we assume that one attachment can not be duplicated in one message)
    int dupCount = 0;
    {
        int attachmentIndex = 0;
        // array to keep information about the attachments for display in UI.
        // information need to be presented to the user and hence the structure of the ui elements (like number of columns in the table) will change accordingly.
        // A variable to store number of messages in which an attachment appears. It also stores one of the attachments if it appears multiple times across messages.
        Map<String, Pair<JsonObject, Integer>> countMessagesMap = new LinkedHashMap<>();
        for (Document d : docAttachmentMap.keySet()) {
            // don't forget to increase msgIndex at the end of the following inner loop.
            for (Blob attachment : docAttachmentMap.get(d)) {
                JsonObject attachmentInfo = getAttachmentDetails(archive, attachment, d);
                // Insert attachmentInfo in an array of JSONObject (attachmentDetails) only if it is not seen previously.
                // If it already exists then increase the count of seen messages by 1.
                JsonElement attachmentName = attachmentInfo.get("filenameWithIndex");
                Pair<JsonObject, Integer> info;
                if (countMessagesMap.containsKey(attachmentName.toString())) {
                    info = countMessagesMap.get(attachmentName.toString());
                    dupCount++;
                } else {
                    info = new Pair(attachmentInfo, 0);
                }
                // increment count by 1.
                info.second = info.second + 1;
                countMessagesMap.put(attachmentName.toString(), info);
            }
        }
        // Now iterate over countMessagesMap to put JsonElement in the array.
        for (Pair<JsonObject, Integer> info : countMessagesMap.values()) {
            JsonObject attachmentInfo = info.first;
            attachmentInfo.addProperty("numMessages", info.second);
            attachmentInfo.addProperty("index", attachmentIndex);
            // This index is used when selecting a specific tile that user clicks on the grid view.
            attachmentIndex++;
            // no need to send it to the front end as it is only used to decide if two attachments were same or not.
            attachmentInfo.remove("filenameWithIndex");
            attachmentDetails.add(attachmentInfo);
            if (attachmentInfo.get("info") != null)
                // once set it can not be reset.
                isNormalized = true;
        }
    }
    // For list view and tile view buttons..
    {
        html.append("<div style=\"display:flex\">\n");
        if (dupCount != 0) {
            if (isHackyYearPresent && year == 1960)
                html.append("<div style=\"text-align:left;width:87%;margin-top:10px;font-family:\"Open Sans\",sans-serif;color:#666;font-size:16px;\">" + numAttachments + " attachments (" + dupCount + " duplicates) in undated messages </div>\n");
            else
                html.append("<div style=\"text-align:left;width:87%;margin-top:10px;font-family:\"Open Sans\",sans-serif;color:#666;font-size:16px;\">" + numAttachments + " attachments (" + dupCount + " duplicates) in " + year + "</div>\n");
        } else {
            if (isHackyYearPresent && year == 1960)
                html.append("<div style=\"text-align:left;width:87%;margin-top:10px;font-family:\"Open Sans\",sans-serif;color:#666;font-size:16px;\">" + numAttachments + " attachments in undated messages </div>\n");
            else
                html.append("<div style=\"text-align:left;width:87%;margin-top:10px;font-family:\"Open Sans\",sans-serif;color:#666;font-size:16px;\">" + numAttachments + " attachments in " + year + "</div>\n");
        }
        html.append("<div class=\"gallery_viewchangebar\" style=\"justify-content:flex-end\">\n");
        html.append("  <div title=\"List View\" class=\"listView\" onclick=\"showListView()\">\n" + "    <img class=\"fbox_toolbarimg\" id=\"listviewimg\" style=\"border-right:none;padding-left:10px;\" src=\"images/list_view.svg\"></img>\n" + "  </div>\n" + "  <div title=\"Grid View\"  class=\"tileView\" onclick=\"showTileView()\">\n" + "    <img class=\"fbox_toolbarimg\" id=\"tileviewimg\" style=\"height:28px;\" src=\"images/tile_view.svg\" ></img>\n" + "  </div>\n");
        html.append("</div>");
        html.append("</div>");
    }
    html.append("<hr/>\n<div class=\"attachments\">\n");
    StringBuilder tilediv = new StringBuilder();
    tilediv.append("<div id=\"attachment-tiles\" style=\"display:none\">");
    tilediv.append("</div> <!-- closing of attachment-tile div-->\n");
    // add tilediv to page.
    html.append(tilediv);
    // create HTML for list view and append to page
    {
        StringBuilder listdiv = new StringBuilder();
        listdiv.append("<div id=\"attachment-list\" style=\"display:none\">");
        listdiv.append("<table id=\"attachment-table\">\n");
        listdiv.append("<thead>\n");
        listdiv.append("<tr>\n");
        listdiv.append("<th>Subject</th>\n");
        listdiv.append("<th>Date</th>\n");
        listdiv.append("<th>Size</th>\n");
        listdiv.append("<th>Attachment name</th>\n");
        // add a field conditionally only if the information is also present for any attachment to be displayed to the user.
        if (isNormalized)
            listdiv.append("<th>More Information</th>\n");
        listdiv.append("</tr>\n");
        listdiv.append("</thead>\n");
        listdiv.append("<tbody>\n");
        listdiv.append("</tbody>\n");
        listdiv.append("</table>\n");
        listdiv.append("</div> <!-- closing of attachment--->\n");
        // add listdiv to page.
        html.append(listdiv);
    }
    // close html divs.
    // muse-doc-attachments
    html.append("\n</div>  <!-- .attachments -->\n");
    // .muse-doc
    html.append("\n</div>  <!-- .muse-doc -->\n");
    StringBuilder js = new StringBuilder();
    {
        if (isNormalized)
            // to pass this information to the front end we assign it to a JS variable.
            js.append("isNormalized=true\n");
        // note: no quotes should be present around attachmentDetails - it is simply a JS object in json notation
        js.append("var attachmentDetails=" + attachmentDetails.toString() + ";\n");
    // js.append("attachmentDetails=eval(attachmentDetailsStr);\n");
    /*	js.append("loadAttachmentTiles();\n"); //invoke method to setup tiles with attachmentDetails data.
			js.append("loadAttachmentList();\n"); //invoke method to setup datatable with attachmentDetails data.
			js.append("if(isListView){ $('#attachment-tiles').hide(); $('#attachment-list').show();} else{$('#attachment-list').hide(); $('#attachment-tiles').show() }\n");//hide the list
*/
    // page.append("$('#attachment-list').hide();\n");//hide the list
    }
    return new Pair<>(html.toString(), js.toString());
}
Also used : JsonObject(com.google.gson.JsonObject) Pair(edu.stanford.muse.util.Pair) Pattern(java.util.regex.Pattern) Blob(edu.stanford.muse.datacache.Blob) IOException(java.io.IOException) JsonArray(com.google.gson.JsonArray) JsonElement(com.google.gson.JsonElement)

Aggregations

Blob (edu.stanford.muse.datacache.Blob)29 Pair (edu.stanford.muse.util.Pair)7 Pattern (java.util.regex.Pattern)5 BlobStore (edu.stanford.muse.datacache.BlobStore)4 Field (org.apache.lucene.document.Field)4 JSONException (org.json.JSONException)3 LinkedHashMultimap (com.google.common.collect.LinkedHashMultimap)2 Multimap (com.google.common.collect.Multimap)2 Gson (com.google.gson.Gson)2 AddressBook (edu.stanford.muse.AddressBookManager.AddressBook)2 Contact (edu.stanford.muse.AddressBookManager.Contact)2 CorrespondentAuthorityMapper (edu.stanford.muse.AddressBookManager.CorrespondentAuthorityMapper)2 AnnotationManager (edu.stanford.muse.AnnotationManager.AnnotationManager)2 Config (edu.stanford.muse.Config)2 Label (edu.stanford.muse.LabelManager.Label)2 LabelManager (edu.stanford.muse.LabelManager.LabelManager)2 edu.stanford.muse.email (edu.stanford.muse.email)2 NameInfo (edu.stanford.muse.ie.NameInfo)2 Document (edu.stanford.muse.index.Document)2 EmailDocument (edu.stanford.muse.index.EmailDocument)2