Example 1 with AnnotationManager

use of edu.stanford.muse.AnnotationManager.AnnotationManager in project epadd by ePADD.

the class IndexUtils method partitionDocsByAnnotationPresence.

 * Partition documents by the presence/absence of annotation text
private static Map<String, DetailedFacetItem> partitionDocsByAnnotationPresence(Collection<? extends Document> docs, Archive archive) {
    Map<String, Set<Document>> tagToDocs = new LinkedHashMap<>();
    Map<String, DetailedFacetItem> result = new LinkedHashMap<>();
    Set<Document> annotatedDocs = new LinkedHashSet<>();
    Set<Document> unannotatedDocs = new LinkedHashSet<>();
    AnnotationManager annotationManager = archive.getAnnotationManager();
    for (Document d : docs) {
        if (!Util.nullOrEmpty(annotationManager.getAnnotation(d.getUniqueId())))
    if (unannotatedDocs.size() > 0) {
        result.put("notannotated", new DetailedFacetItem("Not annotated", "Documents with no annotation", "isannotated", "false"));
        unannotatedDocs.forEach(doc -> result.get("notannotated").addDoc(doc));
    if (annotatedDocs.size() > 0) {
        result.put("annotated", new DetailedFacetItem("Annotated", "Documents with annotation", "isannotated", "true"));
        annotatedDocs.forEach(doc -> result.get("annotated").addDoc(doc));
    return result;
Example 2 with AnnotationManager

use of edu.stanford.muse.AnnotationManager.AnnotationManager in project epadd by ePADD.

the class SearchResult method filterForAnnotationText.

/* *************************ONLY DOCUMENT SPECIFIC FILTERS*************************************** */
 * returns only the docs from amongst the given ones that matches the query specification for flags.
 * @param inputSet The input search result object on which this filtering needs to be done.
 * @return Another SearchResult object containing filtered messages only.
private static SearchResult filterForAnnotationText(SearchResult inputSet) {
    String annotationStr = JSPHelper.getParam(inputSet.queryParams, "annotation");
    if (!Util.nullOrEmpty(annotationStr)) {
        Set<String> annotations = Util.splitFieldForOr(annotationStr);
        AnnotationManager annotationManager = inputSet.getArchive().getAnnotationManager();
        inputSet.matchedDocs = inputSet.matchedDocs.entrySet().stream().filter(entry -> {
            EmailDocument edoc = (EmailDocument) entry.getKey();
            String comment = annotationManager.getAnnotation(edoc.getUniqueId());
            if (!Util.nullOrEmpty(comment)) {
                comment = comment.toLowerCase();
                return annotations.contains(comment);
            } else
                return false;
        }).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
    return inputSet;
Example 3 with AnnotationManager

use of edu.stanford.muse.AnnotationManager.AnnotationManager in project epadd by ePADD.

the class SimpleSessions method loadSessionAsMap.

 * loads session from the given filename, and returns the map of loaded
 * attributes.
 * if readOnly is false, caller MUST make sure to call packIndex.
 * baseDir is Indexer's baseDir (path before "indexes/")
 * @throws IOException
 * @throws LockObtainFailedException
 * @throws CorruptIndexException
 * Change as on Nov 2017-
 * Earlier the whole archive was serialized and deserialized as one big entity. Now it is broken into
 * four main parts, Addressbook, entitybook, correspondentAuthorityMapper and the rest of the object
 * We save all these four components separately in saveArchive. Therefore while reading, we need to read
 * all those separately from appropriate files.
public static Map<String, Object> loadSessionAsMap(String filename, String baseDir, boolean readOnly) throws IOException {"Loading session from file " + filename + " size: " + Util.commatize(new File(filename).length() / 1024) + " KB");
    ObjectInputStream ois = null;
    // keep reading till eof exception
    Map<String, Object> result = new LinkedHashMap<>();
    try {
        ois = new ObjectInputStream(new GZIPInputStream(new FileInputStream(filename)));
        while (true) {
            String key = (String) ois.readObject();
  "loading key: " + key);
            try {
                Object value = ois.readObject();
                if (value == null)
                result.put(key, value);
            } catch (InvalidClassException ice) {
                log.error("Bad version for value of key " + key + ": " + ice + "\nContinuing but this key is not set...");
            } catch (ClassNotFoundException cnfe) {
                log.error("Class not found for value of key " + key + ": " + cnfe + "\nContinuing but this key is not set...");
    } catch (EOFException eof) {"end of session file reached");
    } catch (Exception e) {
        log.warn("Warning unable to load session: " + Util.stackTrace(e));
    if (ois != null)
        try {
        } catch (Exception e) {
            Util.print_exception(e, log);
    // need to set up sentiments explicitly -- now no need since lexicon is part of the session"Memory status: " + Util.getMemoryStats());
    Archive archive = (Archive) result.get("archive");
    // no groups in public mode
    if (archive != null) {
				Read other three modules of Archive object which were set as transient and hence did not serialize.
        // file path names of addressbook, entitybook and correspondentAuthorityMapper data.
        String dir = baseDir + File.separatorChar + Archive.SESSIONS_SUBDIR;
        String addressBookPath = dir + File.separatorChar + Archive.ADDRESSBOOK_SUFFIX;
        String entityBookPath = dir + File.separatorChar + Archive.ENTITYBOOK_SUFFIX;
        String cAuthorityPath = dir + File.separatorChar + Archive.CAUTHORITYMAPPER_SUFFIX;
        String labMapDirPath = dir + File.separatorChar + Archive.LABELMAPDIR;
        String annotationMapPath = dir + File.separatorChar + Archive.ANNOTATION_SUFFIX;
        // above three files are not present. In that case start afresh with importing the email-archive again in processing mode.
        if (!(new File(addressBookPath).exists()) || !(new File(entityBookPath).exists()) || !(new File(cAuthorityPath).exists())) {
            result.put("archive", null);
            return result;
        // ///////////////AddressBook////////////////////////////////////////////
        BufferedReader br = new BufferedReader(new FileReader(addressBookPath));
        AddressBook ab = AddressBook.readObjectFromStream(br);
        archive.addressBook = ab;
        // //////////////EntityBook/////////////////////////////////////
        br = new BufferedReader(new FileReader(entityBookPath));
        EntityBook eb = EntityBook.readObjectFromStream(br);
        // /////////////CorrespondentAuthorityMapper/////////////////////////////
        CorrespondentAuthorityMapper cmapper = null;
        cmapper = CorrespondentAuthorityMapper.readObjectFromStream(cAuthorityPath);
        archive.correspondentAuthorityMapper = cmapper;
        // ///////////////Label Mapper/////////////////////////////////////////////////////
        LabelManager labelManager = null;
        try {
            labelManager = LabelManager.readObjectFromStream(labMapDirPath);
        } catch (Exception e) {
            Util.print_exception("Exception in reading label manager from archive, assigning a new label manager", e, log);
            labelManager = new LabelManager();
        // /////////////Annotation Manager///////////////////////////////////////////////////////
        AnnotationManager annotationManager = AnnotationManager.readObjectFromStream(annotationMapPath);
        // this is useful when we import a legacy archive into processing, where we've updated the pm file directly, without updating the archive.
        try {
            archive.collectionMetadata = readCollectionMetadata(baseDir);
        } catch (Exception e) {
            Util.print_exception("Error trying to read processing metadata file", e, log);
        // ///////////////////////////Done reading//////////////////////////////////////////////////////
        // most of this code should probably move inside Archive, maybe a function called "postDeserialized()"
        archive.postDeserialized(baseDir, readOnly);
        result.put("emailDocs", archive.getAllDocs());
    return result;
Also used : AnnotationManager(edu.stanford.muse.AnnotationManager.AnnotationManager) Archive(edu.stanford.muse.index.Archive) EntityBook( ParseException(org.apache.lucene.queryparser.classic.ParseException) LockObtainFailedException( CorruptIndexException(org.apache.lucene.index.CorruptIndexException) GZIPInputStream( AddressBook(edu.stanford.muse.AddressBookManager.AddressBook) CorrespondentAuthorityMapper(edu.stanford.muse.AddressBookManager.CorrespondentAuthorityMapper) LabelManager(edu.stanford.muse.LabelManager.LabelManager)

Example 4 with AnnotationManager

use of edu.stanford.muse.AnnotationManager.AnnotationManager in project epadd by ePADD.

the class SimpleSessions method readAnnotations.

// read annotation manager from a human readable file
public static void readAnnotations(Archive archive) {
    // if there is an annotations.csv file present in basedir + session directory then read it and  set annotations on
    String dir = archive.baseDir + File.separatorChar + Archive.SESSIONS_SUBDIR;
    String annotationcsv = dir + File.separatorChar + Archive.ANNOTATION_SUFFIX;
    AnnotationManager amanager = AnnotationManager.readObjectFromStream(annotationcsv);
Example 5 with AnnotationManager

use of edu.stanford.muse.AnnotationManager.AnnotationManager in project epadd by ePADD.

the class EmailRenderer method pagesForDocuments.

	 * returns pages and html for a collection of docs, which can be put into a
	 * jog frame. indexer clusters are used to
	 * Changed the first arg type from: Collection<? extends EmailDocument> to Collection<Document>, as we get C
	 * ollection<Document> in browse page or from docsforquery, its a hassle to make them all return EmailDocument
	 * especially when no other document type is used anywhere
public static Pair<DataSet, String> pagesForDocuments(Collection<Document> docs, SearchResult result, String datasetTitle, MultiDoc.ClusteringType coptions) throws Exception {
    StringBuilder html = new StringBuilder();
    int pageNum = 0;
    List<String> pages = new ArrayList<>();
    // need clusters which map to sections in the browsing interface
    List<MultiDoc> clusters;
    // indexer may or may not have indexed all the docs in ds
    // if it has, use its clustering (could be yearly or monthly or category
    // wise
    // if (indexer != null && indexer.clustersIncludeAllDocs(ds))
    // if (indexer != null)
    // IMP: instead of searchResult.getDocsasSet() use the docs that is already ordered by
    // the sortBy order (in SearchResult.selectDocsAndBlobs method.
    clusters = result.getArchive().clustersForDocs(docs, coptions);
		 * else { // categorize by month if the docs have dates if
		 * (EmailUtils.allDocsAreDatedDocs(ds)) clusters =
		 * IndexUtils.partitionDocsByInterval(new ArrayList<DatedDocument>((Set)
		 * ds), true); else // must be category docs clusters =
		 * CategoryDocument.clustersDocsByCategoryName((Collection) ds); }
    List<Document> datasetDocs = new ArrayList<>();
    AnnotationManager annotationManager = result.getArchive().getAnnotationManager();
    // we build up a hierarchy of <section, document, page>
    for (MultiDoc md : clusters) {
        if ( == 0)
        String description = md.description;
        // escape a double
        description = description.replace("\"", "\\\"");
        // quote if any
        // in the
        // description
        html.append("<div class=\"section\" name=\"" + description + "\">\n");
        List<List<String>> clusterResult = new ArrayList<>();
        for (Document d : {
            String pdfAttrib = "";
				 * if (d instanceof PDFDocument) pdfAttrib = "pdfLink=\"" +
				 * ((PDFDocument) d).relativeURLForPDF + "\"";
            html.append("<div class=\"document\" " + pdfAttrib + ">\n");
            // clusterResult.add(docPageList);
            // for (String s: docPageList)
                String comment = Util.escapeHTML(annotationManager.getAnnotation(d.getUniqueId()));
                html.append("<div class=\"page\"");
                if (!Util.nullOrEmpty(comment))
                    html.append(" comment=\"" + comment + "\"");
                if (!Util.nullOrEmpty(comment) && (d instanceof EmailDocument)) {
                    String messageId = d.getUniqueId();
                    html.append(" messageID=\"" + messageId + "\"");
                if (d.isLiked())
                    html.append(" liked=\"true\"");
                // also make sure that browse.jsp (the jsp calling this function) should have a map of LabelID to Label Name, Label type in javascript
                if (d instanceof EmailDocument) {
                    Set<String> labels = result.getArchive().getLabelIDs((EmailDocument) d);
                    if (!Util.nullOrEmpty(labels)) {
                        String val =","));
                        html.append(" labels=\"" + val + "\"");
                    } else
                        html.append(" labels=\"\"");
                // ////////////////////////////////////////DONE reading labels///////////////////////////////////////////////////////////////////////////
                if (d instanceof EmailDocument)
                    html.append(" pageId='" + pageNum++ + "' " + " signature='" + Util.hash(((EmailDocument) d).getSignature()) + "' docId='" + d.getUniqueId() + "'></div>\n");
            // document
        // section
    DataSet dataset = new DataSet(datasetDocs, result, datasetTitle);
    return new Pair<>(dataset, html.toString());
Also used : AnnotationManager(edu.stanford.muse.AnnotationManager.AnnotationManager) Pair(edu.stanford.muse.util.Pair)


