use of edu.stanford.muse.AnnotationManager.AnnotationManager in project epadd by ePADD.
the class IndexUtils method partitionDocsByAnnotationPresence.
/**
* Partition documents by the presence/absence of annotation text
*/
private static Map<String, DetailedFacetItem> partitionDocsByAnnotationPresence(Collection<? extends Document> docs, Archive archive) {
Map<String, Set<Document>> tagToDocs = new LinkedHashMap<>();
Map<String, DetailedFacetItem> result = new LinkedHashMap<>();
Set<Document> annotatedDocs = new LinkedHashSet<>();
Set<Document> unannotatedDocs = new LinkedHashSet<>();
AnnotationManager annotationManager = archive.getAnnotationManager();
for (Document d : docs) {
if (!Util.nullOrEmpty(annotationManager.getAnnotation(d.getUniqueId())))
annotatedDocs.add(d);
else
unannotatedDocs.add(d);
}
if (unannotatedDocs.size() > 0) {
result.put("notannotated", new DetailedFacetItem("Not annotated", "Documents with no annotation", "isannotated", "false"));
unannotatedDocs.forEach(doc -> result.get("notannotated").addDoc(doc));
}
if (annotatedDocs.size() > 0) {
result.put("annotated", new DetailedFacetItem("Annotated", "Documents with annotation", "isannotated", "true"));
annotatedDocs.forEach(doc -> result.get("annotated").addDoc(doc));
}
return result;
}
use of edu.stanford.muse.AnnotationManager.AnnotationManager in project epadd by ePADD.
the class SearchResult method filterForAnnotationText.
/* *************************ONLY DOCUMENT SPECIFIC FILTERS*************************************** */
/**
* returns only the docs from amongst the given ones that matches the query specification for flags.
*
* @param inputSet The input search result object on which this filtering needs to be done.
* @return Another SearchResult object containing filtered messages only.
*/
private static SearchResult filterForAnnotationText(SearchResult inputSet) {
String annotationStr = JSPHelper.getParam(inputSet.queryParams, "annotation");
if (!Util.nullOrEmpty(annotationStr)) {
Set<String> annotations = Util.splitFieldForOr(annotationStr);
AnnotationManager annotationManager = inputSet.getArchive().getAnnotationManager();
inputSet.matchedDocs = inputSet.matchedDocs.entrySet().stream().filter(entry -> {
EmailDocument edoc = (EmailDocument) entry.getKey();
String comment = annotationManager.getAnnotation(edoc.getUniqueId());
if (!Util.nullOrEmpty(comment)) {
comment = comment.toLowerCase();
return annotations.contains(comment);
} else
return false;
}).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
}
return inputSet;
}
use of edu.stanford.muse.AnnotationManager.AnnotationManager in project epadd by ePADD.
the class SimpleSessions method loadSessionAsMap.
/**
* loads session from the given filename, and returns the map of loaded
* attributes.
* if readOnly is false, caller MUST make sure to call packIndex.
* baseDir is Indexer's baseDir (path before "indexes/")
*
* @throws IOException
* @throws LockObtainFailedException
* @throws CorruptIndexException
* Change as on Nov 2017-
* Earlier the whole archive was serialized and deserialized as one big entity. Now it is broken into
* four main parts, Addressbook, entitybook, correspondentAuthorityMapper and the rest of the object
* We save all these four components separately in saveArchive. Therefore while reading, we need to read
* all those separately from appropriate files.
*/
public static Map<String, Object> loadSessionAsMap(String filename, String baseDir, boolean readOnly) throws IOException {
log.info("Loading session from file " + filename + " size: " + Util.commatize(new File(filename).length() / 1024) + " KB");
ObjectInputStream ois = null;
// keep reading till eof exception
Map<String, Object> result = new LinkedHashMap<>();
try {
ois = new ObjectInputStream(new GZIPInputStream(new FileInputStream(filename)));
while (true) {
String key = (String) ois.readObject();
log.info("loading key: " + key);
try {
Object value = ois.readObject();
if (value == null)
break;
result.put(key, value);
} catch (InvalidClassException ice) {
log.error("Bad version for value of key " + key + ": " + ice + "\nContinuing but this key is not set...");
} catch (ClassNotFoundException cnfe) {
log.error("Class not found for value of key " + key + ": " + cnfe + "\nContinuing but this key is not set...");
}
}
} catch (EOFException eof) {
log.info("end of session file reached");
} catch (Exception e) {
log.warn("Warning unable to load session: " + Util.stackTrace(e));
result.clear();
}
if (ois != null)
try {
ois.close();
} catch (Exception e) {
Util.print_exception(e, log);
}
// need to set up sentiments explicitly -- now no need since lexicon is part of the session
log.info("Memory status: " + Util.getMemoryStats());
Archive archive = (Archive) result.get("archive");
// no groups in public mode
if (archive != null) {
/*
Read other three modules of Archive object which were set as transient and hence did not serialize.
*/
// file path names of addressbook, entitybook and correspondentAuthorityMapper data.
String dir = baseDir + File.separatorChar + Archive.SESSIONS_SUBDIR;
String addressBookPath = dir + File.separatorChar + Archive.ADDRESSBOOK_SUFFIX;
String entityBookPath = dir + File.separatorChar + Archive.ENTITYBOOK_SUFFIX;
String cAuthorityPath = dir + File.separatorChar + Archive.CAUTHORITYMAPPER_SUFFIX;
String labMapDirPath = dir + File.separatorChar + Archive.LABELMAPDIR;
String annotationMapPath = dir + File.separatorChar + Archive.ANNOTATION_SUFFIX;
// above three files are not present. In that case start afresh with importing the email-archive again in processing mode.
if (!(new File(addressBookPath).exists()) || !(new File(entityBookPath).exists()) || !(new File(cAuthorityPath).exists())) {
result.put("archive", null);
return result;
}
// ///////////////AddressBook////////////////////////////////////////////
BufferedReader br = new BufferedReader(new FileReader(addressBookPath));
AddressBook ab = AddressBook.readObjectFromStream(br);
archive.addressBook = ab;
br.close();
// //////////////EntityBook/////////////////////////////////////
br = new BufferedReader(new FileReader(entityBookPath));
EntityBook eb = EntityBook.readObjectFromStream(br);
archive.setEntityBook(eb);
br.close();
// /////////////CorrespondentAuthorityMapper/////////////////////////////
CorrespondentAuthorityMapper cmapper = null;
cmapper = CorrespondentAuthorityMapper.readObjectFromStream(cAuthorityPath);
archive.correspondentAuthorityMapper = cmapper;
// ///////////////Label Mapper/////////////////////////////////////////////////////
LabelManager labelManager = null;
try {
labelManager = LabelManager.readObjectFromStream(labMapDirPath);
} catch (Exception e) {
Util.print_exception("Exception in reading label manager from archive, assigning a new label manager", e, log);
labelManager = new LabelManager();
}
archive.setLabelManager(labelManager);
// /////////////Annotation Manager///////////////////////////////////////////////////////
AnnotationManager annotationManager = AnnotationManager.readObjectFromStream(annotationMapPath);
archive.setAnnotationManager(annotationManager);
// this is useful when we import a legacy archive into processing, where we've updated the pm file directly, without updating the archive.
try {
archive.collectionMetadata = readCollectionMetadata(baseDir);
} catch (Exception e) {
Util.print_exception("Error trying to read processing metadata file", e, log);
}
// ///////////////////////////Done reading//////////////////////////////////////////////////////
// most of this code should probably move inside Archive, maybe a function called "postDeserialized()"
archive.postDeserialized(baseDir, readOnly);
result.put("emailDocs", archive.getAllDocs());
}
return result;
}
use of edu.stanford.muse.AnnotationManager.AnnotationManager in project epadd by ePADD.
the class SimpleSessions method readAnnotations.
// read annotation manager from a human readable file
public static void readAnnotations(Archive archive) {
// if there is an annotations.csv file present in basedir + session directory then read it and set annotations on
String dir = archive.baseDir + File.separatorChar + Archive.SESSIONS_SUBDIR;
String annotationcsv = dir + File.separatorChar + Archive.ANNOTATION_SUFFIX;
AnnotationManager amanager = AnnotationManager.readObjectFromStream(annotationcsv);
archive.setAnnotationManager(amanager);
}
use of edu.stanford.muse.AnnotationManager.AnnotationManager in project epadd by ePADD.
the class EmailRenderer method pagesForDocuments.
/*
* returns pages and html for a collection of docs, which can be put into a
* jog frame. indexer clusters are used to
*
* Changed the first arg type from: Collection<? extends EmailDocument> to Collection<Document>, as we get C
* ollection<Document> in browse page or from docsforquery, its a hassle to make them all return EmailDocument
* especially when no other document type is used anywhere
*/
public static Pair<DataSet, String> pagesForDocuments(Collection<Document> docs, SearchResult result, String datasetTitle, MultiDoc.ClusteringType coptions) throws Exception {
StringBuilder html = new StringBuilder();
int pageNum = 0;
List<String> pages = new ArrayList<>();
// need clusters which map to sections in the browsing interface
List<MultiDoc> clusters;
// indexer may or may not have indexed all the docs in ds
// if it has, use its clustering (could be yearly or monthly or category
// wise
// if (indexer != null && indexer.clustersIncludeAllDocs(ds))
// if (indexer != null)
// IMP: instead of searchResult.getDocsasSet() use the docs that is already ordered by
// the sortBy order (in SearchResult.selectDocsAndBlobs method.
clusters = result.getArchive().clustersForDocs(docs, coptions);
/*
* else { // categorize by month if the docs have dates if
* (EmailUtils.allDocsAreDatedDocs(ds)) clusters =
* IndexUtils.partitionDocsByInterval(new ArrayList<DatedDocument>((Set)
* ds), true); else // must be category docs clusters =
* CategoryDocument.clustersDocsByCategoryName((Collection) ds); }
*/
List<Document> datasetDocs = new ArrayList<>();
AnnotationManager annotationManager = result.getArchive().getAnnotationManager();
// we build up a hierarchy of <section, document, page>
for (MultiDoc md : clusters) {
if (md.docs.size() == 0)
continue;
String description = md.description;
// escape a double
description = description.replace("\"", "\\\"");
// quote if any
// in the
// description
html.append("<div class=\"section\" name=\"" + description + "\">\n");
List<List<String>> clusterResult = new ArrayList<>();
for (Document d : md.docs) {
String pdfAttrib = "";
/*
* if (d instanceof PDFDocument) pdfAttrib = "pdfLink=\"" +
* ((PDFDocument) d).relativeURLForPDF + "\"";
*/
html.append("<div class=\"document\" " + pdfAttrib + ">\n");
datasetDocs.add(d);
pages.add(null);
clusterResult.add(null);
// clusterResult.add(docPageList);
// for (String s: docPageList)
{
String comment = Util.escapeHTML(annotationManager.getAnnotation(d.getUniqueId()));
html.append("<div class=\"page\"");
if (!Util.nullOrEmpty(comment))
html.append(" comment=\"" + comment + "\"");
if (!Util.nullOrEmpty(comment) && (d instanceof EmailDocument)) {
String messageId = d.getUniqueId();
html.append(" messageID=\"" + messageId + "\"");
}
if (d.isLiked())
html.append(" liked=\"true\"");
// also make sure that browse.jsp (the jsp calling this function) should have a map of LabelID to Label Name, Label type in javascript
if (d instanceof EmailDocument) {
Set<String> labels = result.getArchive().getLabelIDs((EmailDocument) d);
if (!Util.nullOrEmpty(labels)) {
String val = labels.stream().collect(Collectors.joining(","));
html.append(" labels=\"" + val + "\"");
} else
html.append(" labels=\"\"");
}
// ////////////////////////////////////////DONE reading labels///////////////////////////////////////////////////////////////////////////
if (d instanceof EmailDocument)
html.append(" pageId='" + pageNum++ + "' " + " signature='" + Util.hash(((EmailDocument) d).getSignature()) + "' docId='" + d.getUniqueId() + "'></div>\n");
}
// document
html.append("</div>");
}
// section
html.append("</div>\n");
}
DataSet dataset = new DataSet(datasetDocs, result, datasetTitle);
return new Pair<>(dataset, html.toString());
}
Aggregations