Search in sources :

Example 1 with AnnotationManager

use of edu.stanford.muse.AnnotationManager.AnnotationManager in project epadd by ePADD.

the class IndexUtils method partitionDocsByAnnotationPresence.

/**
 * Partition documents by the presence/absence of annotation text
 */
private static Map<String, DetailedFacetItem> partitionDocsByAnnotationPresence(Collection<? extends Document> docs, Archive archive) {
    Map<String, Set<Document>> tagToDocs = new LinkedHashMap<>();
    Map<String, DetailedFacetItem> result = new LinkedHashMap<>();
    Set<Document> annotatedDocs = new LinkedHashSet<>();
    Set<Document> unannotatedDocs = new LinkedHashSet<>();
    AnnotationManager annotationManager = archive.getAnnotationManager();
    for (Document d : docs) {
        if (!Util.nullOrEmpty(annotationManager.getAnnotation(d.getUniqueId())))
            annotatedDocs.add(d);
        else
            unannotatedDocs.add(d);
    }
    if (unannotatedDocs.size() > 0) {
        result.put("notannotated", new DetailedFacetItem("Not annotated", "Documents with no annotation", "isannotated", "false"));
        unannotatedDocs.forEach(doc -> result.get("notannotated").addDoc(doc));
    }
    if (annotatedDocs.size() > 0) {
        result.put("annotated", new DetailedFacetItem("Annotated", "Documents with annotation", "isannotated", "true"));
        annotatedDocs.forEach(doc -> result.get("annotated").addDoc(doc));
    }
    return result;
}
Also used : AnnotationManager(edu.stanford.muse.AnnotationManager.AnnotationManager)

Example 2 with AnnotationManager

use of edu.stanford.muse.AnnotationManager.AnnotationManager in project epadd by ePADD.

the class SearchResult method filterForAnnotationText.

/* *************************ONLY DOCUMENT SPECIFIC FILTERS*************************************** */
/**
 * returns only the docs from amongst the given ones that matches the query specification for flags.
 *
 * @param inputSet The input search result object on which this filtering needs to be done.
 * @return Another SearchResult object containing filtered messages only.
 */
private static SearchResult filterForAnnotationText(SearchResult inputSet) {
    String annotationStr = JSPHelper.getParam(inputSet.queryParams, "annotation");
    if (!Util.nullOrEmpty(annotationStr)) {
        Set<String> annotations = Util.splitFieldForOr(annotationStr);
        AnnotationManager annotationManager = inputSet.getArchive().getAnnotationManager();
        inputSet.matchedDocs = inputSet.matchedDocs.entrySet().stream().filter(entry -> {
            EmailDocument edoc = (EmailDocument) entry.getKey();
            String comment = annotationManager.getAnnotation(edoc.getUniqueId());
            if (!Util.nullOrEmpty(comment)) {
                comment = comment.toLowerCase();
                return annotations.contains(comment);
            } else
                return false;
        }).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
    }
    return inputSet;
}
Also used : AnnotationManager(edu.stanford.muse.AnnotationManager.AnnotationManager)

Example 3 with AnnotationManager

use of edu.stanford.muse.AnnotationManager.AnnotationManager in project epadd by ePADD.

the class SimpleSessions method loadSessionAsMap.

/**
 * loads session from the given filename, and returns the map of loaded
 * attributes.
 * if readOnly is false, caller MUST make sure to call packIndex.
 * baseDir is Indexer's baseDir (path before "indexes/")
 *
 * @throws IOException
 * @throws LockObtainFailedException
 * @throws CorruptIndexException
 * Change as on Nov 2017-
 * Earlier the whole archive was serialized and deserialized as one big entity. Now it is broken into
 * four main parts, Addressbook, entitybook, correspondentAuthorityMapper and the rest of the object
 * We save all these four components separately in saveArchive. Therefore while reading, we need to read
 * all those separately from appropriate files.
 */
public static Map<String, Object> loadSessionAsMap(String filename, String baseDir, boolean readOnly) throws IOException {
    log.info("Loading session from file " + filename + " size: " + Util.commatize(new File(filename).length() / 1024) + " KB");
    ObjectInputStream ois = null;
    // keep reading till eof exception
    Map<String, Object> result = new LinkedHashMap<>();
    try {
        ois = new ObjectInputStream(new GZIPInputStream(new FileInputStream(filename)));
        while (true) {
            String key = (String) ois.readObject();
            log.info("loading key: " + key);
            try {
                Object value = ois.readObject();
                if (value == null)
                    break;
                result.put(key, value);
            } catch (InvalidClassException ice) {
                log.error("Bad version for value of key " + key + ": " + ice + "\nContinuing but this key is not set...");
            } catch (ClassNotFoundException cnfe) {
                log.error("Class not found for value of key " + key + ": " + cnfe + "\nContinuing but this key is not set...");
            }
        }
    } catch (EOFException eof) {
        log.info("end of session file reached");
    } catch (Exception e) {
        log.warn("Warning unable to load session: " + Util.stackTrace(e));
        result.clear();
    }
    if (ois != null)
        try {
            ois.close();
        } catch (Exception e) {
            Util.print_exception(e, log);
        }
    // need to set up sentiments explicitly -- now no need since lexicon is part of the session
    log.info("Memory status: " + Util.getMemoryStats());
    Archive archive = (Archive) result.get("archive");
    // no groups in public mode
    if (archive != null) {
        /*
				Read other three modules of Archive object which were set as transient and hence did not serialize.
				*/
        // file path names of addressbook, entitybook and correspondentAuthorityMapper data.
        String dir = baseDir + File.separatorChar + Archive.SESSIONS_SUBDIR;
        String addressBookPath = dir + File.separatorChar + Archive.ADDRESSBOOK_SUFFIX;
        String entityBookPath = dir + File.separatorChar + Archive.ENTITYBOOK_SUFFIX;
        String cAuthorityPath = dir + File.separatorChar + Archive.CAUTHORITYMAPPER_SUFFIX;
        String labMapDirPath = dir + File.separatorChar + Archive.LABELMAPDIR;
        String annotationMapPath = dir + File.separatorChar + Archive.ANNOTATION_SUFFIX;
        // above three files are not present. In that case start afresh with importing the email-archive again in processing mode.
        if (!(new File(addressBookPath).exists()) || !(new File(entityBookPath).exists()) || !(new File(cAuthorityPath).exists())) {
            result.put("archive", null);
            return result;
        }
        // ///////////////AddressBook////////////////////////////////////////////
        BufferedReader br = new BufferedReader(new FileReader(addressBookPath));
        AddressBook ab = AddressBook.readObjectFromStream(br);
        archive.addressBook = ab;
        br.close();
        // //////////////EntityBook/////////////////////////////////////
        br = new BufferedReader(new FileReader(entityBookPath));
        EntityBook eb = EntityBook.readObjectFromStream(br);
        archive.setEntityBook(eb);
        br.close();
        // /////////////CorrespondentAuthorityMapper/////////////////////////////
        CorrespondentAuthorityMapper cmapper = null;
        cmapper = CorrespondentAuthorityMapper.readObjectFromStream(cAuthorityPath);
        archive.correspondentAuthorityMapper = cmapper;
        // ///////////////Label Mapper/////////////////////////////////////////////////////
        LabelManager labelManager = null;
        try {
            labelManager = LabelManager.readObjectFromStream(labMapDirPath);
        } catch (Exception e) {
            Util.print_exception("Exception in reading label manager from archive, assigning a new label manager", e, log);
            labelManager = new LabelManager();
        }
        archive.setLabelManager(labelManager);
        // /////////////Annotation Manager///////////////////////////////////////////////////////
        AnnotationManager annotationManager = AnnotationManager.readObjectFromStream(annotationMapPath);
        archive.setAnnotationManager(annotationManager);
        // this is useful when we import a legacy archive into processing, where we've updated the pm file directly, without updating the archive.
        try {
            archive.collectionMetadata = readCollectionMetadata(baseDir);
        } catch (Exception e) {
            Util.print_exception("Error trying to read processing metadata file", e, log);
        }
        // ///////////////////////////Done reading//////////////////////////////////////////////////////
        // most of this code should probably move inside Archive, maybe a function called "postDeserialized()"
        archive.postDeserialized(baseDir, readOnly);
        result.put("emailDocs", archive.getAllDocs());
    }
    return result;
}
Also used : AnnotationManager(edu.stanford.muse.AnnotationManager.AnnotationManager) Archive(edu.stanford.muse.index.Archive) EntityBook(edu.stanford.muse.ie.variants.EntityBook) ParseException(org.apache.lucene.queryparser.classic.ParseException) LockObtainFailedException(org.apache.lucene.store.LockObtainFailedException) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) GZIPInputStream(java.util.zip.GZIPInputStream) AddressBook(edu.stanford.muse.AddressBookManager.AddressBook) CorrespondentAuthorityMapper(edu.stanford.muse.AddressBookManager.CorrespondentAuthorityMapper) LabelManager(edu.stanford.muse.LabelManager.LabelManager)

Example 4 with AnnotationManager

use of edu.stanford.muse.AnnotationManager.AnnotationManager in project epadd by ePADD.

the class SimpleSessions method readAnnotations.

// read annotation manager from a human readable file
public static void readAnnotations(Archive archive) {
    // if there is an annotations.csv file present in basedir + session directory then read it and  set annotations on
    String dir = archive.baseDir + File.separatorChar + Archive.SESSIONS_SUBDIR;
    String annotationcsv = dir + File.separatorChar + Archive.ANNOTATION_SUFFIX;
    AnnotationManager amanager = AnnotationManager.readObjectFromStream(annotationcsv);
    archive.setAnnotationManager(amanager);
}
Also used : AnnotationManager(edu.stanford.muse.AnnotationManager.AnnotationManager)

Example 5 with AnnotationManager

use of edu.stanford.muse.AnnotationManager.AnnotationManager in project epadd by ePADD.

the class EmailRenderer method pagesForDocuments.

/*
	 * returns pages and html for a collection of docs, which can be put into a
	 * jog frame. indexer clusters are used to
	 *
	 * Changed the first arg type from: Collection<? extends EmailDocument> to Collection<Document>, as we get C
	 * ollection<Document> in browse page or from docsforquery, its a hassle to make them all return EmailDocument
	 * especially when no other document type is used anywhere
	 */
public static Pair<DataSet, String> pagesForDocuments(Collection<Document> docs, SearchResult result, String datasetTitle, MultiDoc.ClusteringType coptions) throws Exception {
    StringBuilder html = new StringBuilder();
    int pageNum = 0;
    List<String> pages = new ArrayList<>();
    // need clusters which map to sections in the browsing interface
    List<MultiDoc> clusters;
    // indexer may or may not have indexed all the docs in ds
    // if it has, use its clustering (could be yearly or monthly or category
    // wise
    // if (indexer != null && indexer.clustersIncludeAllDocs(ds))
    // if (indexer != null)
    // IMP: instead of searchResult.getDocsasSet() use the docs that is already ordered by
    // the sortBy order (in SearchResult.selectDocsAndBlobs method.
    clusters = result.getArchive().clustersForDocs(docs, coptions);
    /*
		 * else { // categorize by month if the docs have dates if
		 * (EmailUtils.allDocsAreDatedDocs(ds)) clusters =
		 * IndexUtils.partitionDocsByInterval(new ArrayList<DatedDocument>((Set)
		 * ds), true); else // must be category docs clusters =
		 * CategoryDocument.clustersDocsByCategoryName((Collection) ds); }
		 */
    List<Document> datasetDocs = new ArrayList<>();
    AnnotationManager annotationManager = result.getArchive().getAnnotationManager();
    // we build up a hierarchy of <section, document, page>
    for (MultiDoc md : clusters) {
        if (md.docs.size() == 0)
            continue;
        String description = md.description;
        // escape a double
        description = description.replace("\"", "\\\"");
        // quote if any
        // in the
        // description
        html.append("<div class=\"section\" name=\"" + description + "\">\n");
        List<List<String>> clusterResult = new ArrayList<>();
        for (Document d : md.docs) {
            String pdfAttrib = "";
            /*
				 * if (d instanceof PDFDocument) pdfAttrib = "pdfLink=\"" +
				 * ((PDFDocument) d).relativeURLForPDF + "\"";
				 */
            html.append("<div class=\"document\" " + pdfAttrib + ">\n");
            datasetDocs.add(d);
            pages.add(null);
            clusterResult.add(null);
            // clusterResult.add(docPageList);
            // for (String s: docPageList)
            {
                String comment = Util.escapeHTML(annotationManager.getAnnotation(d.getUniqueId()));
                html.append("<div class=\"page\"");
                if (!Util.nullOrEmpty(comment))
                    html.append(" comment=\"" + comment + "\"");
                if (!Util.nullOrEmpty(comment) && (d instanceof EmailDocument)) {
                    String messageId = d.getUniqueId();
                    html.append(" messageID=\"" + messageId + "\"");
                }
                if (d.isLiked())
                    html.append(" liked=\"true\"");
                // also make sure that browse.jsp (the jsp calling this function) should have a map of LabelID to Label Name, Label type in javascript
                if (d instanceof EmailDocument) {
                    Set<String> labels = result.getArchive().getLabelIDs((EmailDocument) d);
                    if (!Util.nullOrEmpty(labels)) {
                        String val = labels.stream().collect(Collectors.joining(","));
                        html.append(" labels=\"" + val + "\"");
                    } else
                        html.append(" labels=\"\"");
                }
                // ////////////////////////////////////////DONE reading labels///////////////////////////////////////////////////////////////////////////
                if (d instanceof EmailDocument)
                    html.append(" pageId='" + pageNum++ + "' " + " signature='" + Util.hash(((EmailDocument) d).getSignature()) + "' docId='" + d.getUniqueId() + "'></div>\n");
            }
            // document
            html.append("</div>");
        }
        // section
        html.append("</div>\n");
    }
    DataSet dataset = new DataSet(datasetDocs, result, datasetTitle);
    return new Pair<>(dataset, html.toString());
}
Also used : AnnotationManager(edu.stanford.muse.AnnotationManager.AnnotationManager) Pair(edu.stanford.muse.util.Pair)

Aggregations

AnnotationManager (edu.stanford.muse.AnnotationManager.AnnotationManager)7 Pair (edu.stanford.muse.util.Pair)3 AddressBook (edu.stanford.muse.AddressBookManager.AddressBook)1 CorrespondentAuthorityMapper (edu.stanford.muse.AddressBookManager.CorrespondentAuthorityMapper)1 LabelManager (edu.stanford.muse.LabelManager.LabelManager)1 EntityBook (edu.stanford.muse.ie.variants.EntityBook)1 Archive (edu.stanford.muse.index.Archive)1 GZIPInputStream (java.util.zip.GZIPInputStream)1 CorruptIndexException (org.apache.lucene.index.CorruptIndexException)1 ParseException (org.apache.lucene.queryparser.classic.ParseException)1 LockObtainFailedException (org.apache.lucene.store.LockObtainFailedException)1