use of edu.stanford.muse.AddressBookManager.CorrespondentAuthorityMapper in project epadd by ePADD.
the class SimpleSessions method loadSessionAsMap.
/**
* loads session from the given filename, and returns the map of loaded
* attributes.
* if readOnly is false, caller MUST make sure to call packIndex.
* baseDir is Indexer's baseDir (path before "indexes/")
*
* @throws IOException
* @throws LockObtainFailedException
* @throws CorruptIndexException
* Change as on Nov 2017-
* Earlier the whole archive was serialized and deserialized as one big entity. Now it is broken into
* four main parts, Addressbook, entitybook, correspondentAuthorityMapper and the rest of the object
* We save all these four components separately in saveArchive. Therefore while reading, we need to read
* all those separately from appropriate files.
*/
public static Map<String, Object> loadSessionAsMap(String filename, String baseDir, boolean readOnly) throws IOException {
log.info("Loading session from file " + filename + " size: " + Util.commatize(new File(filename).length() / 1024) + " KB");
ObjectInputStream ois = null;
// keep reading till eof exception
Map<String, Object> result = new LinkedHashMap<>();
try {
ois = new ObjectInputStream(new GZIPInputStream(new FileInputStream(filename)));
while (true) {
String key = (String) ois.readObject();
log.info("loading key: " + key);
try {
Object value = ois.readObject();
if (value == null)
break;
result.put(key, value);
} catch (InvalidClassException ice) {
log.error("Bad version for value of key " + key + ": " + ice + "\nContinuing but this key is not set...");
} catch (ClassNotFoundException cnfe) {
log.error("Class not found for value of key " + key + ": " + cnfe + "\nContinuing but this key is not set...");
}
}
} catch (EOFException eof) {
log.info("end of session file reached");
} catch (Exception e) {
log.warn("Warning unable to load session: " + Util.stackTrace(e));
result.clear();
}
if (ois != null)
try {
ois.close();
} catch (Exception e) {
Util.print_exception(e, log);
}
// need to set up sentiments explicitly -- now no need since lexicon is part of the session
log.info("Memory status: " + Util.getMemoryStats());
Archive archive = (Archive) result.get("archive");
// no groups in public mode
if (archive != null) {
/*
Read other three modules of Archive object which were set as transient and hence did not serialize.
*/
// file path names of addressbook, entitybook and correspondentAuthorityMapper data.
String dir = baseDir + File.separatorChar + Archive.SESSIONS_SUBDIR;
String addressBookPath = dir + File.separatorChar + Archive.ADDRESSBOOK_SUFFIX;
String entityBookPath = dir + File.separatorChar + Archive.ENTITYBOOK_SUFFIX;
String cAuthorityPath = dir + File.separatorChar + Archive.CAUTHORITYMAPPER_SUFFIX;
String labMapDirPath = dir + File.separatorChar + Archive.LABELMAPDIR;
String annotationMapPath = dir + File.separatorChar + Archive.ANNOTATION_SUFFIX;
// above three files are not present. In that case start afresh with importing the email-archive again in processing mode.
if (!(new File(addressBookPath).exists()) || !(new File(entityBookPath).exists()) || !(new File(cAuthorityPath).exists())) {
result.put("archive", null);
return result;
}
// ///////////////AddressBook////////////////////////////////////////////
BufferedReader br = new BufferedReader(new FileReader(addressBookPath));
AddressBook ab = AddressBook.readObjectFromStream(br);
archive.addressBook = ab;
br.close();
// //////////////EntityBook/////////////////////////////////////
br = new BufferedReader(new FileReader(entityBookPath));
EntityBook eb = EntityBook.readObjectFromStream(br);
archive.setEntityBook(eb);
br.close();
// /////////////CorrespondentAuthorityMapper/////////////////////////////
CorrespondentAuthorityMapper cmapper = null;
cmapper = CorrespondentAuthorityMapper.readObjectFromStream(cAuthorityPath);
archive.correspondentAuthorityMapper = cmapper;
// ///////////////Label Mapper/////////////////////////////////////////////////////
LabelManager labelManager = null;
try {
labelManager = LabelManager.readObjectFromStream(labMapDirPath);
} catch (Exception e) {
Util.print_exception("Exception in reading label manager from archive, assigning a new label manager", e, log);
labelManager = new LabelManager();
}
archive.setLabelManager(labelManager);
// /////////////Annotation Manager///////////////////////////////////////////////////////
AnnotationManager annotationManager = AnnotationManager.readObjectFromStream(annotationMapPath);
archive.setAnnotationManager(annotationManager);
// this is useful when we import a legacy archive into processing, where we've updated the pm file directly, without updating the archive.
try {
archive.collectionMetadata = readCollectionMetadata(baseDir);
} catch (Exception e) {
Util.print_exception("Error trying to read processing metadata file", e, log);
}
// ///////////////////////////Done reading//////////////////////////////////////////////////////
// most of this code should probably move inside Archive, maybe a function called "postDeserialized()"
archive.postDeserialized(baseDir, readOnly);
result.put("emailDocs", archive.getAllDocs());
}
return result;
}
use of edu.stanford.muse.AddressBookManager.CorrespondentAuthorityMapper in project epadd by ePADD.
the class ArchiveReaderWriter method loadSessionAsMap.
// #############################################End: Weak reference cache for the archive object and archive#####################################
// #############################################Start: Reading/loading an archive bag###########################################################
/**
* loads session from the given filename, and returns the map of loaded
* attributes.
* if readOnly is false, caller MUST make sure to call packIndex.
* baseDir is Indexer's baseDir (path before "indexes/")
*
* @throws IOException
* @throws LockObtainFailedException
* @throws CorruptIndexException
* Change as on Nov 2017-
* Earlier the whole archive was serialized and deserialized as one big entity. Now it is broken into
* four main parts, Addressbook, entitybook, correspondentAuthorityMapper and the rest of the object
* We save all these four components separately in saveArchive. Therefore while reading, we need to read
* all those separately from appropriate files.
*/
private static Map<String, Object> loadSessionAsMap(String filename, String baseDir, boolean readOnly, ModeConfig.Mode mode) throws IOException {
log.info("Loading session from file " + filename + " size: " + Util.commatize(new File(filename).length() / 1024) + " KB");
ObjectInputStream ois = null;
long startTime = System.currentTimeMillis();
// keep reading till eof exception
Map<String, Object> result = new LinkedHashMap<>();
try {
ois = new ObjectInputStream(new BufferedInputStream(new GZIPInputStream(new FileInputStream(filename))));
while (true) {
String key = (String) ois.readObject();
log.info("loading key: " + key);
try {
Object value = ois.readObject();
if (value == null)
break;
result.put(key, value);
} catch (InvalidClassException ice) {
log.error("Bad version for value of key " + key + ": " + ice + "\nContinuing but this key is not set...");
} catch (ClassNotFoundException cnfe) {
log.error("Class not found for value of key " + key + ": " + cnfe + "\nContinuing but this key is not set...");
}
}
} catch (EOFException eof) {
log.info("end of session file reached");
} catch (Exception e) {
log.warn("Warning unable to load session: " + Util.stackTrace(e));
result.clear();
}
if (ois != null)
try {
ois.close();
} catch (Exception e) {
Util.print_exception(e, log);
}
log.info("Session loaded successfully");
// need to set up sentiments explicitly -- now no need since lexicon is part of the session
log.info("Memory status: " + Util.getMemoryStats());
Archive archive = (Archive) result.get("archive");
// no groups in public mode
if (archive != null) {
long deserializationTime = System.currentTimeMillis();
log.info("Time taken to read and deserialize archive object: " + (deserializationTime - startTime) + " milliseconds");
/*
Read other three modules of Archive object which were set as transient and hence did not serialize.
*/
// file path names of addressbook, entitybook and correspondentAuthorityMapper data.
String dir = baseDir + File.separatorChar + Archive.BAG_DATA_FOLDER + File.separatorChar + Archive.SESSIONS_SUBDIR;
String addressBookPath = dir + File.separatorChar + Archive.ADDRESSBOOK_SUFFIX;
String entityBookPath = dir + File.separatorChar + Archive.ENTITYBOOKMANAGER_SUFFIX;
String cAuthorityPath = dir + File.separatorChar + Archive.CAUTHORITYMAPPER_SUFFIX;
String labMapDirPath = dir + File.separatorChar + Archive.LABELMAPDIR;
String annotationMapPath = dir + File.separatorChar + Archive.ANNOTATION_SUFFIX;
String blobNormalizationMapPath = dir + File.separatorChar + Archive.BLOBLNORMALIZATIONFILE_SUFFIX;
// above three files are not present. In that case start afresh with importing the email-archive again in processing mode.
if (!(new File(addressBookPath).exists()) || /*|| !(new File(entityBookPath).exists())*/
!(new File(cAuthorityPath).exists())) {
result.put("archive", null);
return result;
}
log.info("Setting up post-deserialization action");
archive.postDeserialized(baseDir, readOnly);
long postDeserializationDuration = System.currentTimeMillis();
log.info("Post-deserialization action completed in " + (postDeserializationDuration - deserializationTime) + " milliseconds");
// /////////////Processing metadata////////////////////////////////////////////////
// Read collection metadata first because some of the collection's information might be used while loading other modules. Like first date and last date of an archive
// is used when doc's dates are found corrupted.
// override the PM inside the archive with the one in the PM file
// update: since v5 no pm will be inside the archive.
// this is useful when we import a legacy archive into processing, where we've updated the pm file directly, without updating the archive.
log.info("Loading collection metadata");
try {
archive.collectionMetadata = readCollectionMetadata(baseDir);
} catch (Exception e) {
Util.print_exception("Error trying to read processing metadata file", e, log);
}
long collectionMetadataDuration = System.currentTimeMillis();
if (archive.collectionMetadata != null) {
log.info("Collection metadata loaded successfully in " + (collectionMetadataDuration - postDeserializationDuration) + " milliseconds");
}
// ///////////////AddressBook////////////////////////////////////////////
log.info("Loading address book");
archive.addressBook = readAddressBook(addressBookPath, archive.getAllDocs());
long addressBookLoading = System.currentTimeMillis();
log.info("Addressbook loaded successfully in " + (addressBookLoading - collectionMetadataDuration) + " milliseconds");
// //////////////EntityBook/////////////////////////////////////
log.info("Loading EntityBook Manager");
EntityBookManager eb = readEntityBookManager(archive, entityBookPath);
long entityBookLoading = System.currentTimeMillis();
archive.setEntityBookManager(eb);
log.info("EntityBook Manager loaded successfully in " + (entityBookLoading - addressBookLoading) + " milliseconds");
// /////////////CorrespondentAuthorityMapper/////////////////////////////
long correspondentAuthorityLoading, labelManagerLoading, annotationManagerLoading, blobLoading;
if (mode != ModeConfig.Mode.DISCOVERY) {
CorrespondentAuthorityMapper cmapper = null;
log.info("Loading Correspondent authority mapper");
cmapper = CorrespondentAuthorityMapper.readObjectFromStream(cAuthorityPath);
correspondentAuthorityLoading = System.currentTimeMillis();
log.info("Correspondent authority mapper loaded successfully in " + (correspondentAuthorityLoading - entityBookLoading) + " milliseconds");
archive.correspondentAuthorityMapper = cmapper;
} else {
correspondentAuthorityLoading = entityBookLoading;
}
// ///////////////Label Mapper/////////////////////////////////////////////////////
if (mode != ModeConfig.Mode.DISCOVERY) {
log.info("Loading Label Manager");
LabelManager labelManager = readLabelManager(ArchiveReaderWriter.getArchiveIDForArchive(archive), labMapDirPath);
archive.setLabelManager(labelManager);
labelManagerLoading = System.currentTimeMillis();
log.info("Label Manager loaded successfully in " + (labelManagerLoading - correspondentAuthorityLoading) + " milliseconds");
} else {
labelManagerLoading = correspondentAuthorityLoading;
}
// /////////////Annotation Manager///////////////////////////////////////////////////////
if (mode != ModeConfig.Mode.DISCOVERY) {
log.info("Loading Annotation Manager");
AnnotationManager annotationManager = AnnotationManager.readObjectFromStream(annotationMapPath);
archive.setAnnotationManager(annotationManager);
annotationManagerLoading = System.currentTimeMillis();
log.info("Annotation Manager loaded successfully in " + (annotationManagerLoading - labelManagerLoading));
} else {
annotationManagerLoading = labelManagerLoading;
}
// ///////////////////Blob Normalization map (IF exists)//////////////////////////////////////////////////////
if (new File(blobNormalizationMapPath).exists()) {
log.info("Computing blob normalization map (An artifact of AMatica tool)");
archive.getBlobStore().setNormalizationMap(blobNormalizationMapPath);
blobLoading = System.currentTimeMillis();
log.info("Blob normalization map computed successfully in " + (blobLoading - annotationManagerLoading) + " milliseconds");
} else {
blobLoading = annotationManagerLoading;
}
// ///////////////////////////Done reading//////////////////////////////////////////////////////
// most of this code should probably move inside Archive, maybe a function called "postDeserialized()"
result.put("emailDocs", archive.getAllDocs());
log.info("Assigning thread IDs");
archive.assignThreadIds();
log.info("Thread IDs assigned successfully");
log.info("Total time spent in archive loading is " + (System.currentTimeMillis() - startTime) + " milliseconds");
}
return result;
}
Aggregations