Search in sources :

Example 1 with NameInfo

use of edu.stanford.muse.ie.NameInfo in project epadd by ePADD.

the class Lens method getHitsQuick.

/**
 * looks up given names in address book + message content index and returns a json of scores. lensPrefs has the user's term preferences
 */
public static List<JSONObject> getHitsQuick(List<Pair<String, Float>> names, LensPrefs lensPrefs, Archive archive, String baseURL, Collection<EmailDocument> allDocs) throws JSONException, IOException {
    List<JSONObject> list = new ArrayList<>();
    Indexer indexer = archive.indexer;
    AddressBook ab = archive.addressBook;
    String archiveID = ArchiveReaderWriter.getArchiveIDForArchive(archive);
    if (indexer == null)
        return list;
    for (Pair<String, Float> pair : names) {
        String term = pair.getFirst();
        if (term.length() <= 2)
            continue;
        float pageScore = pair.getSecond();
        term = JSPHelper.convertRequestParamToUTF8(term);
        // Prune all the non-alphabetical characters
        term = term.replaceAll("[\\r\\n]", "");
        term = term.replaceAll("[^\\p{L}\\p{Nd}\\s\\.]", "");
        term = term.replaceAll("\\s+", " ");
        JSONObject json = new JSONObject();
        json.put("text", term);
        json.put("pageScore", pageScore);
        NameInfo ni = archive.nameLookup(term);
        if (ni != null && ni.type != null && !"notype".equals(ni.type))
            json.put("type", ni.type);
        int NAME_IN_ADDRESS_BOOK_WEIGHT = 100;
        // look up term in 2 places -- AB and in the index
        // temporarily disabled AB - sgh. IndexUtils.selectDocsByPersons(ab, allDocs, new String[]{term}).size();
        int hitsInAddressBook = 0;
        // To check: does this include subject line also...
        int hitsInMessageContent = archive.countHitsForQuery("\"" + term + "\"");
        // weigh any docs for name in addressbook hugely more!
        double termScore = hitsInAddressBook * NAME_IN_ADDRESS_BOOK_WEIGHT + hitsInMessageContent;
        json.put("indexScore", termScore);
        int totalHits = hitsInAddressBook + hitsInMessageContent;
        // this is an over-estimate since the same message might match both in addressbook and in body. it is used only for scoring and should NEVER be shown to the user. getTermHitDetails will get the accurate count
        json.put("nMessages", totalHits);
        log.info(term + ": " + hitsInAddressBook + " in address book, " + hitsInMessageContent + " in messages");
        String url = baseURL + "/browse?archiveID=" + archiveID + "&adv-search=1&termBody=on&termSubject=on&termAttachments=on&termOriginalBody=on&term=\"" + term + "\"";
        json.put("url", url);
        // JSONArray messages = new JSONArray();
        // json.put("messages", messages); // empty messages
        list.add(json);
    }
    log.info(list.size() + " terms hit");
    list = scoreHits(list, lensPrefs);
    return list;
}
Also used : NameInfo(edu.stanford.muse.ie.NameInfo) JSONObject(org.json.JSONObject) AddressBook(edu.stanford.muse.AddressBookManager.AddressBook)

Aggregations

AddressBook (edu.stanford.muse.AddressBookManager.AddressBook)1 NameInfo (edu.stanford.muse.ie.NameInfo)1 JSONObject (org.json.JSONObject)1