Search in sources :

Example 1 with NER

use of edu.stanford.muse.ner.NER in project epadd by ePADD.

the class JSPHelper method fetchAndIndexEmails.

// /* this version of fetchemails must have folders defined in request since there is no primary email address */
// public static Triple<Collection<EmailDocument>, AddressBook, BlobStore> fetchEmails(HttpServletRequest request, HttpSession session, boolean download) throws Exception
// {
// return fetchEmails (request, session, download, /* downloadattachments = */ false, false);
// }
// 
// /** fetches messages without downloading or attachments.
// * support default folder for primary email address */
// public static Triple<Collection<EmailDocument>, AddressBook, BlobStore> fetchEmails(HttpServletRequest request, HttpSession session, String primaryEmailAddress) throws Exception
// {
// return fetchEmails (request, session, false, false, false);
// }
// 
// public static boolean fetchEmailsDefaultFolders(HttpServletRequest request, HttpSession session, boolean downloadMessageText, boolean downloadAttachments) throws Exception
// {
// try {
// fetchEmails(request, session, downloadMessageText, downloadAttachments, true);
// } catch (Exception e) {
// return false;
// }
// return true;
// }
// 
// public static Triple<Collection<EmailDocument>, AddressBook, BlobStore> fetchEmails(HttpServletRequest request, HttpSession session, boolean downloadMessageText, boolean downloadAttachments, boolean useDefaultFolders)
// throws UnsupportedEncodingException, MessagingException, InterruptedException, IOException, JSONException, NoDefaultFolderException, CancelledException
// {
// return fetchEmails(request, session, downloadMessageText, downloadAttachments, useDefaultFolders, null);
// }
/**
 * A VIP method.
 * reads email accounts and installs addressBook and emailDocs into session
 * useDefaultFolders: use the default folder for that fetcher if there are
 * no explicit folders in that fetcher.
 * throws out of memory error if it runs out of memory.
 *
 * @throws JSONException
 * @throws IOException
 * @throws InterruptedException
 * @throws MessagingException
 * @throws UnsupportedEncodingException
 * @throws NoDefaultFolderException
 * @throws Exception
 */
public static void fetchAndIndexEmails(Archive archive, MuseEmailFetcher m, HttpServletRequest request, HttpSession session, boolean downloadMessageText, boolean downloadAttachments, boolean useDefaultFolders) throws MessagingException, InterruptedException, IOException, JSONException, NoDefaultFolderException, CancelledException, OutOfMemoryError {
    // first thing, set up a static status so user doesn't see a stale status message
    session.setAttribute("statusProvider", new StaticStatusProvider("Starting up..."));
    checkContainer(request);
    String encoding = request.getCharacterEncoding();
    log.info("request parameter encoding is " + encoding);
    if (!downloadMessageText)
        if ("true".equalsIgnoreCase(request.getParameter("downloadMessageText"))) {
            downloadMessageText = true;
            log.info("Downloading message text because advanced option was set");
        }
    if (!downloadAttachments)
        if ("true".equalsIgnoreCase(request.getParameter("downloadAttachments"))) {
            downloadAttachments = true;
            // because text is needed for attachment wall -- otherwise we can't break out from piclens to browsing messages associated with a particular thumbnail
            downloadMessageText = true;
            log.info("Downloading attachments because advanced option was set");
        }
    String[] allFolders = request.getParameterValues("folder");
    if (allFolders != null) {
        // try to read folder strings, first checking for exceptions
        try {
            allFolders = JSPHelper.convertRequestParamsToUTF8(allFolders, true);
        } catch (UnsupportedEncodingException e) {
            // report exception and try to read whatever folders we can, ignoring the exception this time
            log.warn("Unsupported encoding exception: " + e);
            try {
                allFolders = JSPHelper.convertRequestParamsToUTF8(allFolders, false);
            } catch (UnsupportedEncodingException e1) {
                log.warn("Should not reach here!" + e1);
            }
        }
    }
    Multimap<String, String> requestMap = convertRequestToMap(request);
    Filter filter = Filter.parseFilter(requestMap);
    // if required, forceEncoding can go into fetch config
    // String s = (String) session.getAttribute("forceEncoding");
    FetchConfig fc = new FetchConfig();
    fc.downloadMessages = downloadMessageText;
    fc.downloadAttachments = downloadAttachments;
    fc.filter = filter;
    archive.setBaseDir(getBaseDir(m, request));
    m.fetchAndIndexEmails(archive, allFolders, useDefaultFolders, fc, session);
    // make sure the archive is dumped at this point
    archive.close();
    archive.openForRead();
    // perform entity IE related tasks only if the message text is available
    if (downloadMessageText) {
        String modelFile = SequenceModel.MODEL_FILENAME;
        NERModel nerModel = null;
        // =(SequenceModel) session.getAttribute("ner");
        session.setAttribute("statusProvider", new StaticStatusProvider("Loading NER sequence model from resource: " + modelFile + "..."));
        try {
            if (System.getProperty("muse.dummy.ner") != null) {
                log.info("Using dummy NER model, all CIC patterns will be treated as valid entities");
                nerModel = new DummyNERModel();
            } else {
                log.info("Loading NER sequence model from: " + modelFile + " ...");
                nerModel = SequenceModel.loadModelFromRules(SequenceModel.RULES_DIRNAME);
            }
        } catch (IOException e) {
            Util.print_exception("Could not load the sequence model from: " + modelFile, e, log);
        }
        if (nerModel == null) {
            log.error("Could not load NER model from: " + modelFile);
        } else {
            NER ner = new NER(archive, nerModel);
            session.setAttribute("statusProvider", ner);
            ner.recognizeArchive();
            // Here, instead of getting the count of all entities (present in ner.stats object)
            // get the count of only those entities which pass a given thersold.
            // This is to fix a bug where the count of person entities displayed on browse-top.jsp
            // page was different than the count of entities actually displayed following a thersold.
            // @TODO make it more modular
            // archive.collectionMetadata.entityCounts = ner.stats.counts;
            double theta = 0.001;
            archive.collectionMetadata.entityCounts = Archive.getEntitiesCountMapModuloThersold(archive, theta);
            log.info(ner.stats);
        }
        // archive.collectionMetadata.numPotentiallySensitiveMessages = archive.numMatchesPresetQueries();
        log.info("Number of potentially sensitive messages " + archive.collectionMetadata.numPotentiallySensitiveMessages);
        // Is there a reliable and more proper way of checking the mode it is running in?
        String logF = System.getProperty("muse.log");
        if (logF == null || logF.endsWith("epadd.log")) {
        // try {
        // InternalAuthorityAssigner assignauthorities = new InternalAuthorityAssigner();
        // session.setAttribute("statusProvider", assignauthorities);
        // assignauthorities.initialize(archive);
        // if (!assignauthorities.isCancelled())
        // request.getSession().setAttribute("authorities", assignauthorities);
        // else
        // assignauthorities = null;
        // boolean success = assignauthorities.checkFeaturesIndex(archive, true);
        // if (!success) {
        // log.warn("Could not build context mixtures for entities");
        // } else
        // log.info("Successfully built context mixtures for entities");
        // } catch (Exception e) {
        // log.warn("Exception while building context mixtures", e);
        // }
        }
    }
// add the new stores
}
Also used : DummyNERModel(edu.stanford.muse.ner.model.DummyNERModel) DummyNERModel(edu.stanford.muse.ner.model.DummyNERModel) NERModel(edu.stanford.muse.ner.model.NERModel) NER(edu.stanford.muse.ner.NER)

Aggregations

NER (edu.stanford.muse.ner.NER)1 DummyNERModel (edu.stanford.muse.ner.model.DummyNERModel)1 NERModel (edu.stanford.muse.ner.model.NERModel)1