Search in sources :

Example 16 with Contact

use of edu.stanford.muse.AddressBookManager.Contact in project epadd by ePADD.

the class IndexUtils method computeDetailedFacets.

/**
 * version that stores actual dates instead of just counts for each facet
 */
public static Map<String, Collection<DetailedFacetItem>> computeDetailedFacets(Collection<Document> docs, Archive archive) {
    AddressBook addressBook = archive.addressBook;
    Map<String, Collection<DetailedFacetItem>> facetMap = new LinkedHashMap<>();
    if (addressBook != null) {
        // people
        Map<Contact, DetailedFacetItem> peopleMap = partitionDocsByPerson(docs, addressBook);
        facetMap.put("correspondent", peopleMap.values());
        // direction
        Map<String, DetailedFacetItem> directionMap = partitionDocsByDirection(docs, addressBook);
        if (directionMap.size() > 1)
            facetMap.put("direction", directionMap.values());
        /*
			--No longer need this code as restriction, reviewed etc. are handled by labels--
			// flags -- provide them only if they have at least 2 types in these docs. if all docs have the same value for a particular flag, no point showing it.
			Map<String, DetailedFacetItem> doNotTransferMap = partitionDocsByDoNotTransfer(docs);
			if  (doNotTransferMap.size() > 1)
			facetMap.put("transfer", doNotTransferMap.values());

			Map<String, DetailedFacetItem> transferWithRestrictionsMap = partitionDocsByTransferWithRestrictions(docs);
			if  (transferWithRestrictionsMap.size() > 1)
				facetMap.put("restrictions", transferWithRestrictionsMap.values());
			Map<String, DetailedFacetItem> reviewedMap = partitionDocsByReviewed(docs);
			if  (reviewedMap.size() > 1)
				facetMap.put("reviewed", reviewedMap.values());
			*/
        // facet for restriction labels
        Map<String, DetailedFacetItem> restrlabels = partitionDocsByLabelTypes(docs, archive, LabelManager.LabType.RESTRICTION);
        facetMap.put("Restriction Labels", restrlabels.values());
        // facet for general labels
        Map<String, DetailedFacetItem> genlabels = partitionDocsByLabelTypes(docs, archive, LabelManager.LabType.GENERAL);
        facetMap.put("General Labels", genlabels.values());
        // facet for accession IDs- only in modes other than appraisal
        if (!ModeConfig.isAppraisalMode()) {
            Map<String, DetailedFacetItem> accIDs = partitionDocsByAccessionID(docs, archive);
            facetMap.put("Accessions", accIDs.values());
        }
        Map<String, DetailedFacetItem> annotationPresenceMap = partitionDocsByAnnotationPresence(docs, archive);
        facetMap.put("Annotations", annotationPresenceMap.values());
        // attachments
        if (!ModeConfig.isPublicMode()) {
            Map<String, DetailedFacetItem> attachmentTypesMap = partitionDocsByAttachmentType(docs);
            facetMap.put("attachment type", attachmentTypesMap.values());
        }
    }
    if (!ModeConfig.isPublicMode()) {
        Map<String, DetailedFacetItem> folderNameMap = partitionDocsByFolder(docs);
        if (folderNameMap.size() > 0)
            facetMap.put("folders", folderNameMap.values());
    }
    // sort so that in each topic, the heaviest facets are first
    for (String s : facetMap.keySet()) {
        Collection<DetailedFacetItem> detailedFacets = facetMap.get(s);
        List<DetailedFacetItem> list = new ArrayList<>(detailedFacets);
        Collections.sort(list);
        facetMap.put(s, list);
    }
    return facetMap;
}
Also used : AddressBook(edu.stanford.muse.AddressBookManager.AddressBook) Contact(edu.stanford.muse.AddressBookManager.Contact)

Example 17 with Contact

use of edu.stanford.muse.AddressBookManager.Contact in project epadd by ePADD.

the class Lens method detailsForTerm.

/**
 * gets details from index for the given term
 */
public static JSONObject detailsForTerm(String term, float pageScore, Archive archive, AddressBook ab, String baseURL, Collection<EmailDocument> allDocs) throws JSONException, IOException {
    if (term.length() <= 2)
        return null;
    term = JSPHelper.convertRequestParamToUTF8(term);
    JSONObject json = new JSONObject();
    json.put("text", term);
    json.put("pageScore", pageScore);
    int NAME_IN_ADDRESS_BOOK_WEIGHT = 100;
    // look up term in 2 places -- AB and in the index
    List<EmailDocument> docsForNameInAddressBook = (List) IndexUtils.selectDocsByPersonsAsList(ab, allDocs, new String[] { term });
    List<EmailDocument> docsForTerm = (List) new ArrayList<>(archive.docsForQuery("\"" + term + "\"", -1, Indexer.QueryType.FULL));
    // weigh any docs for name in addressbook hugely more!
    double termScore = docsForNameInAddressBook.size() * NAME_IN_ADDRESS_BOOK_WEIGHT + docsForTerm.size();
    json.put("indexScore", termScore);
    Set<EmailDocument> finalDocSet = new LinkedHashSet<>();
    finalDocSet.addAll(docsForNameInAddressBook);
    finalDocSet.addAll(docsForTerm);
    List<EmailDocument> finalDocList = new ArrayList<>(finalDocSet);
    json.put("nMessages", finalDocList.size());
    // score people
    Map<Contact, Float> peopleScores = new LinkedHashMap<>();
    for (EmailDocument ed : finalDocSet) {
        Collection<String> addrs = ed.getParticipatingAddrsExcept(ab.getOwnAddrs());
        for (String s : addrs) {
            if ("user".equals(s))
                continue;
            // weight = 1/size
            float weight = 1.0f / addrs.size();
            Contact c = ab.lookupByEmail(s);
            peopleScores.merge(c, weight, (a, b) -> a + b);
        }
    }
    // add the top people
    int MAX_PEOPLE = 5;
    List<Pair<Contact, Float>> pairs = Util.sortMapByValue(peopleScores);
    JSONArray people = new JSONArray();
    Contact own = ab.getContactForSelf();
    int count = 0;
    for (Pair<Contact, Float> p : pairs) {
        if (count > MAX_PEOPLE)
            break;
        // ab.lookupByEmail(email);
        Contact c = p.getFirst();
        if (c == own)
            // ignore own name
            continue;
        JSONObject person = new JSONObject();
        String displayName = c == null ? "" : c.pickBestName();
        person.put("person", displayName);
        person.put("score", p.getSecond());
        people.put(count, person);
        count++;
    }
    json.put("people", people);
    if (finalDocList.size() > 0 && log.isDebugEnabled())
        log.debug("Term: " + term + " content hits: " + docsForTerm.size() + " header hits: " + docsForNameInAddressBook.size() + " total: " + finalDocList.size());
    String url = baseURL + "/browse?term=\"" + term + "\"";
    json.put("url", url);
    JSONArray messages = new JSONArray();
    // put up to 5 teasers in the json response
    int N_TEASERS = 5;
    for (int i = 0; i < finalDocList.size() && i < N_TEASERS; i++) {
        JSONObject message = finalDocList.get(i).toJSON(0);
        messages.put(i, message);
    }
    json.put("messages", messages);
    return json;
}
Also used : JSONArray(org.json.JSONArray) Contact(edu.stanford.muse.AddressBookManager.Contact) JSONObject(org.json.JSONObject) Pair(edu.stanford.muse.util.Pair)

Example 18 with Contact

use of edu.stanford.muse.AddressBookManager.Contact in project epadd by ePADD.

the class EmailUtils method getContactsForMessage.

/* returns a set of contact objects for all to/from/cc/bcc of the message */
public static Set<Contact> getContactsForMessage(AddressBook ab, EmailDocument ed) {
    // only lookup the fields (to/cc/bcc/from) that have been enabled
    Set<InternetAddress> allAddressesInMessage = new LinkedHashSet<>();
    // now check for mailing list state
    if (!Util.nullOrEmpty(ed.to)) {
        allAddressesInMessage.addAll((List) Arrays.asList(ed.to));
    }
    if (!Util.nullOrEmpty(ed.from)) {
        allAddressesInMessage.addAll((List) Arrays.asList(ed.from));
    }
    if (!Util.nullOrEmpty(ed.cc)) {
        allAddressesInMessage.addAll((List) Arrays.asList(ed.cc));
    }
    if (!Util.nullOrEmpty(ed.bcc)) {
        allAddressesInMessage.addAll((List) Arrays.asList(ed.bcc));
    }
    Set<Contact> contactsInMessage = new LinkedHashSet<>();
    for (InternetAddress a : allAddressesInMessage) {
        // try and find the contact for both the email address and the name, because sometimes (in extreme cases only) perhaps the email is not there, and we only have a name
        Contact c = ab.lookupByEmail(a.getAddress());
        if (c != null)
            contactsInMessage.add(c);
        else {
            // look up name contact only if the email lookup failed -- hopefully this is rare
            log.debug("Warning: email lookup failed for " + a);
            Collection<Contact> contacts = ab.lookupByName(a.getPersonal());
            if (!Util.nullOrEmpty(contacts))
                contactsInMessage.addAll(contacts);
        }
    }
    return contactsInMessage;
}
Also used : InternetAddress(javax.mail.internet.InternetAddress) Contact(edu.stanford.muse.AddressBookManager.Contact)

Example 19 with Contact

use of edu.stanford.muse.AddressBookManager.Contact in project epadd by ePADD.

the class CrossCollectionSearch method initialize.

/**
 * initializes lookup structures (entity infos and ctokenToInfos) for cross collection search
 * reads all archives available in the base dir.
 * should be synchronized so there's no chance of doing it multiple times at the same time.
 */
private static synchronized void initialize(String baseDir) {
    // this is created only once in one run. if it has already been created, reuse it.
    // in the future, this may be read from a serialized file, etc.
    cTokenToInfos = LinkedHashMultimap.create();
    File[] files = new File(baseDir).listFiles();
    if (files == null) {
        log.warn("Trying to initialize cross collection search from an invalid directory: " + baseDir);
        return;
    }
    int archiveNum = 0;
    for (File f : files) {
        if (!f.isDirectory())
            continue;
        try {
            String archiveFile = f.getAbsolutePath() + File.separator + Archive.BAG_DATA_FOLDER + File.separator + Archive.SESSIONS_SUBDIR + File.separator + "default" + SimpleSessions.getSessionSuffix();
            if (!new File(archiveFile).exists()) {
                log.warn("Unable to find archive file" + archiveFile + ".. Serious error");
                continue;
            }
            // Assumption is that this feature is present only in discovery mode. In future when we want to add it to processing, we need proper care.
            Archive archive = ArchiveReaderWriter.readArchiveIfPresent(f.getAbsolutePath(), ModeConfig.Mode.DISCOVERY);
            if (archive == null) {
                log.warn("failed to read archive from " + f.getAbsolutePath());
                continue;
            }
            log.info("Loaded archive from " + f.getAbsolutePath());
            log.info("Loaded archive metadata from " + f.getAbsolutePath());
            // process all docs in this archive to set up centityToInfo map
            String archiveID = ArchiveReaderWriter.getArchiveIDForArchive(archive);
            Map<String, EntityInfo> centityToInfo = new LinkedHashMap<>();
            {
                // get all contacts from the addressbook
                Set<Pair<String, Pair<Pair<Date, Date>, Integer>>> correspondentEntities = new LinkedHashSet<>();
                {
                    Map<Contact, DetailedFacetItem> res = IndexUtils.partitionDocsByPerson(archive.getAllDocs(), archive.getAddressBook());
                    res.entrySet().forEach(s -> {
                        // get contactname
                        Contact c = s.getKey();
                        // get duration (first and last doc where this contact was used)
                        Set<EmailDocument> edocs = s.getValue().docs.stream().map(t -> (EmailDocument) t).collect(Collectors.toSet());
                        Pair<Date, Date> duration = EmailUtils.getFirstLast(edocs);
                        if (duration == null) {
                            duration = new Pair<>(archive.collectionMetadata.firstDate, archive.collectionMetadata.lastDate);
                        }
                        if (duration.first == null)
                            duration.first = archive.collectionMetadata.firstDate;
                        if (duration.second == null)
                            duration.second = archive.collectionMetadata.lastDate;
                        // get number of messages where this was used.
                        Integer count = s.getValue().docs.size();
                        if (c.getNames() != null) {
                            Pair<Date, Date> finalDuration = duration;
                            c.getNames().forEach(w -> {
                                if (!Util.nullOrEmpty(w) && finalDuration != null && count != null)
                                    correspondentEntities.add(new Pair(canonicalize(w), new Pair(finalDuration, count)));
                            });
                        }
                        if (c.getEmails() != null) {
                            Pair<Date, Date> finalDuration1 = duration;
                            c.getEmails().forEach(w -> {
                                if (!Util.nullOrEmpty(w) && finalDuration1 != null && count != null)
                                    correspondentEntities.add(new Pair(canonicalize(w), new Pair(finalDuration1, count)));
                            });
                        }
                    });
                }
                // get all entities from entitybookmanager
                Set<Pair<String, Pair<Pair<Date, Date>, Integer>>> entitiessummary = new LinkedHashSet<>();
                {
                    entitiessummary = archive.getEntityBookManager().getAllEntitiesSummary();
                    // filter out any null or empty strings (just in case)
                    // don't canonicalize right away because we need to keep the original form of the name
                    entitiessummary = entitiessummary.stream().filter(s -> !Util.nullOrEmpty(s.first)).collect(Collectors.toSet());
                }
                // if an entity is present as a person entity as well as in correspondent then consider the count of the person entity as the final count.  Therefore start with
                // processing of correspondent entities.
                correspondentEntities.forEach(entity -> {
                    String centity = canonicalize(entity.first);
                    EntityInfo ei = centityToInfo.get(centity);
                    if (ei == null) {
                        ei = new EntityInfo();
                        ei.archiveID = archiveID;
                        ei.displayName = entity.first;
                        centityToInfo.put(centity, ei);
                    }
                    ei.isCorrespondent = true;
                    ei.firstDate = entity.second.first.first;
                    ei.lastDate = entity.second.first.second;
                    ei.count = entity.second.second;
                });
                // Now process entities (except correspondents).
                entitiessummary.forEach(entity -> {
                    String centity = canonicalize(entity.first);
                    EntityInfo ei = centityToInfo.get(centity);
                    if (ei == null) {
                        ei = new EntityInfo();
                        ei.archiveID = archiveID;
                        ei.displayName = entity.first;
                        centityToInfo.put(centity, ei);
                    }
                    // ei.isCorrespondent=true;
                    ei.firstDate = entity.second.first.first;
                    ei.lastDate = entity.second.first.second;
                    ei.count = entity.second.second;
                });
            }
            log.info("Archive # " + archiveNum + " read " + centityToInfo.size() + " entities");
            // now set up this map as a token map
            for (EntityInfo ei : centityToInfo.values()) {
                String entity = ei.displayName;
                String centity = canonicalize(entity);
                allCEntities.add(centity);
                // consider a set of tokens because we don't want repeats
                Set<String> ctokens = new LinkedHashSet<>(Util.tokenize(centity));
                for (String ctoken : ctokens) cTokenToInfos.put(ctoken, ei);
            }
        } catch (Exception e) {
            Util.print_exception("Error loading archive in directory " + f.getAbsolutePath(), e, log);
        }
        archiveNum++;
    }
}
Also used : Config(edu.stanford.muse.Config) java.util(java.util) edu.stanford.muse.index(edu.stanford.muse.index) AddressBook(edu.stanford.muse.AddressBookManager.AddressBook) Util(edu.stanford.muse.util.Util) Multimap(com.google.common.collect.Multimap) Collectors(java.util.stream.Collectors) File(java.io.File) MappedEntity(edu.stanford.muse.ie.variants.MappedEntity) DetailedFacetItem(edu.stanford.muse.util.DetailedFacetItem) Contact(edu.stanford.muse.AddressBookManager.Contact) Pair(edu.stanford.muse.util.Pair) Logger(org.apache.logging.log4j.Logger) EntityBook(edu.stanford.muse.ie.variants.EntityBook) EmailUtils(edu.stanford.muse.util.EmailUtils) SimpleSessions(edu.stanford.muse.webapp.SimpleSessions) ModeConfig(edu.stanford.muse.webapp.ModeConfig) LogManager(org.apache.logging.log4j.LogManager) LinkedHashMultimap(com.google.common.collect.LinkedHashMultimap) Contact(edu.stanford.muse.AddressBookManager.Contact) DetailedFacetItem(edu.stanford.muse.util.DetailedFacetItem) File(java.io.File) Pair(edu.stanford.muse.util.Pair)

Example 20 with Contact

use of edu.stanford.muse.AddressBookManager.Contact in project epadd by ePADD.

the class NameTypes method computeInfo.

public static void computeInfo(Map<String, NameInfo> nameMap, Collection<EmailDocument> allDocs, Archive archive, Lexicon lex) throws IOException {
    // assign types to all the names
    if (allDocs == null)
        allDocs = (List) archive.getAllDocs();
    // compute name -> nameInfo
    Map<String, Collection<Document>> sentimentToDocs = archive.getSentimentMap(lex, true);
    for (EmailDocument ed : allDocs) {
        String id = ed.getUniqueId();
        List<String> names = archive.getNamesForDocId(id, Indexer.QueryType.FULL);
        List<Address> mentionedAddresses = ed.getToCCBCC();
        Set<String> sentimentsForDoc = new LinkedHashSet<>();
        for (String sentiment : sentimentToDocs.keySet()) {
            if (sentimentToDocs.get(sentiment).contains(ed))
                sentimentsForDoc.add(sentiment);
        }
        for (String name : names) {
            // canonical title
            String cTitle = name.trim().toLowerCase().replaceAll(" ", "_");
            NameInfo I = nameMap.get(cTitle);
            if (I == null) {
                log.info("Warning: null info for name: " + name);
                continue;
            }
            // Map sentiment to its prominence in document.
            if (I.sentimentCatToCount == null)
                I.sentimentCatToCount = new LinkedHashMap<>();
            for (String sentiment : sentimentsForDoc) {
                if (// if the sentiment isn't there.
                !I.sentimentCatToCount.containsKey(sentiment))
                    I.sentimentCatToCount.put(sentiment, 1);
                else {
                    int sum = I.sentimentCatToCount.get(sentiment);
                    sum = sum + 1;
                    I.sentimentCatToCount.put(sentiment, sum);
                }
            }
            I.sentimentCatToCount = Util.reorderMapByValue(I.sentimentCatToCount);
            // obtain list of contacts to whom email is being sent.
            for (Address adr : mentionedAddresses) {
                InternetAddress emailadr = (InternetAddress) adr;
                String address_string = emailadr.getAddress();
                Contact associatedcontact = archive.addressBook.lookupByEmail(address_string);
                if (I.peopleToCount == null)
                    I.peopleToCount = new LinkedHashMap<>();
                if (// if the contact is not yet associated.
                !I.peopleToCount.containsKey(associatedcontact))
                    I.peopleToCount.put(associatedcontact, 1);
                else {
                    int sum = I.peopleToCount.get(associatedcontact);
                    sum = sum + 1;
                    I.peopleToCount.put(associatedcontact, sum);
                }
            }
            if (I.peopleToCount != null)
                I.peopleToCount = Util.reorderMapByValue(I.peopleToCount);
            // while (I.peopleToCount.containsKey(null)){ //clean peopleToCount
            // I.peopleToCount.remove(null);
            // System.out.println ("Cleaned peopleToCount.");
            // }
            // determine start and end dates of the term.
            Date documentDate = ed.getDate();
            if (I.firstDate == null)
                I.firstDate = documentDate;
            if (I.lastDate == null)
                I.lastDate = documentDate;
            if (I.firstDate.after(documentDate))
                I.firstDate = documentDate;
            if (I.lastDate.before(documentDate))
                I.lastDate = documentDate;
        // System.out.println("Name " + name + " FirstDate: " + (I.firstDate.toString()) + " LastDate:" + (I.lastDate.toString()));
        }
    }
// compute map of sentiment -> docs for each sentiment in goodSentiments
// for every document, get canonical name of a person who is associated with it.
// for each docs in archive, get the list of names
// for each name, update the first/last date, get the list of docs, and count how many of them are in the intersection with each sentiment
}
Also used : InternetAddress(javax.mail.internet.InternetAddress) Address(javax.mail.Address) InternetAddress(javax.mail.internet.InternetAddress) Contact(edu.stanford.muse.AddressBookManager.Contact)

Aggregations

Contact (edu.stanford.muse.AddressBookManager.Contact)27 AddressBook (edu.stanford.muse.AddressBookManager.AddressBook)10 InternetAddress (javax.mail.internet.InternetAddress)7 Address (javax.mail.Address)6 JSONObject (org.json.JSONObject)5 Blob (edu.stanford.muse.datacache.Blob)4 Pair (edu.stanford.muse.util.Pair)4 Util (edu.stanford.muse.util.Util)4 java.util (java.util)4 Collectors (java.util.stream.Collectors)4 JSONArray (org.json.JSONArray)4 LinkedHashMultimap (com.google.common.collect.LinkedHashMultimap)3 Multimap (com.google.common.collect.Multimap)3 EmailUtils (edu.stanford.muse.util.EmailUtils)3 LogManager (org.apache.logging.log4j.LogManager)3 Logger (org.apache.logging.log4j.Logger)3 MailingList (edu.stanford.muse.AddressBookManager.MailingList)2 AnnotationManager (edu.stanford.muse.AnnotationManager.AnnotationManager)2 Config (edu.stanford.muse.Config)2 BlobStore (edu.stanford.muse.datacache.BlobStore)2