use of edu.stanford.muse.index.Archive in project epadd by ePADD.
the class Photos method serveImage.
/**
* serve up an image file.
* Be very careful about security. The file param should always be an offset from a known and controlled dir and should not allow ".." to prevent file traversal attacks.
* this method is a slightly different version of a similar function in Muse's JSPHelper.
* this method is required to support providing images without loading an archive.
* it loads file with relative path from current archive/images by default
* unless given type=discovery/delivery/processing in which case it looks for file path relative to ModeConfig.REPO_DIR_DISCOVERY/DELIVERY/PROCESSING.
* Note: file always has forward slashes for path separator, regardless of platform
*/
public static void serveImage(HttpServletRequest request, HttpServletResponse response) throws IOException {
String filename = request.getParameter("file");
if (Util.nullOrEmpty(filename)) {
log.warn("Empty filename sent to serveImage");
return;
}
if (// avoid file injection!
filename.contains(".." + File.separator)) {
log.warn("File traversal attack !? Disallowing serveFile for illegal filename: " + filename);
response.sendError(HttpServletResponse.SC_FORBIDDEN);
return;
}
filename = JSPHelper.convertRequestParamToUTF8(filename);
// the request always has /. On Windows we may need to change that to \
if (File.separator.equals("\\"))
filename = filename.replaceAll("/", "\\\\");
else
filename = filename.replaceAll("/", File.separator);
HttpSession session = request.getSession();
String baseDir, filePath;
Archive archive = JSPHelper.getArchive(request);
if (archive != null) {
baseDir = archive.baseDir;
filePath = baseDir + File.separator + Archive.IMAGES_SUBDIR + File.separator + filename;
} else if (ModeConfig.isProcessingMode()) {
baseDir = edu.stanford.muse.Config.REPO_DIR_PROCESSING;
filePath = baseDir + File.separator + filename;
} else if (ModeConfig.isDiscoveryMode()) {
baseDir = edu.stanford.muse.Config.REPO_DIR_DISCOVERY;
filePath = baseDir + File.separator + filename;
} else if (ModeConfig.isDeliveryMode()) {
baseDir = edu.stanford.muse.Config.REPO_DIR_DELIVERY;
filePath = baseDir + File.separator + filename;
} else {
// filename is coming with 'user' in it. so no need to put it here.
baseDir = Config.REPO_DIR_APPRAISAL;
filePath = baseDir + File.separator + filename;
}
// could check if user is authorized here... or get the userKey directly from session
// log.info("Serving image from: " + filePath + " and filename is: " + filename);
JSPHelper.writeFileToResponse(session, response, filePath, true);
}
use of edu.stanford.muse.index.Archive in project epadd by ePADD.
the class CorrespondentAuthorityMapper method getCorrespondentAuthorityInfo.
/**
* returns an authorityInfo object representing info needed by the front-end. Use only for rendering the authorities table.
*/
public AuthorityInfo getCorrespondentAuthorityInfo(String archiveID, AddressBook ab, String name) throws IOException, ParseException {
String cname = canonicalize(name);
AuthorityInfo result = new AuthorityInfo();
result.isConfirmed = false;
result.name = name;
// it was read from two csv files (candidateCorrespondentAuthorities.csv and confirmedCorrespondentAuthorities.csv).
if (cnameToCount == null || cnameToCount.size() == 0) {
Archive archive = SimpleSessions.getArchiveForArchiveID(archiveID);
List<Pair<Contact, Integer>> pairs = ab.sortedContactsAndCounts((Collection) archive.getAllDocs());
for (Pair<Contact, Integer> p : pairs) {
Contact c = p.getFirst();
String lname = c.pickBestName();
String lcname = canonicalize(lname);
if (Util.nullOrEmpty(lcname))
continue;
cnameToCount.put(lcname, p.getSecond());
}
}
// ///////////////////////////////////////////////////////////
Integer nMessages = (cnameToCount != null) ? cnameToCount.get(cname) : null;
result.nMessages = (nMessages == null) ? 0 : nMessages;
String tooltip = "";
Collection<Contact> contacts = ab.lookupByName(name);
if (contacts != null)
for (Contact contact : contacts) {
tooltip += contact.toTooltip();
}
else {
result.errorMessage = "Name not in address book: " + name;
return result;
}
result.url = "browse?archiveID=" + archiveID + "&adv-search=on&correspondentTo=on&correspondentFrom=on&correspondentCc=on&correspondentBcc=on&correspondent=" + name;
result.tooltip = tooltip;
result.nMessages = nMessages == null ? 0 : nMessages;
AuthorityMapper.AuthorityRecord authRecord = cnameToAuthority.get(cname);
if (authRecord != null) {
result.isConfirmed = true;
result.confirmedAuthority = authRecord;
}
List<AuthorityMapper.AuthorityRecord> candidates = new ArrayList<>();
Collection<Long> fastIds = cnameToFastIdCandidates.get(cname);
if (fastIds != null)
for (Long id : fastIds) candidates.add(getAuthRecordForFASTId(id));
result.candidates = candidates;
return result;
}
use of edu.stanford.muse.index.Archive in project epadd by ePADD.
the class SimpleSessions method readArchiveIfPresent.
/**
* VIP method. Should be the single place to load an archive from disk.
* loads an archive from the given directory. always re-uses archive objects loaded from the same directory.
* this is fine when:
* - running single-user
* - running discovery mode epadd, since a single archive should be loaded only once.
* - even in a hosted mode with different archives in simultaneous play, where different people have their own userKeys and therefore different dirs.
* It may NOT be fine if multiple people are operating on their different copies of an archive loaded from the same place. Don't see a use-case for this right now.
* if you don't like that, tough luck.
* return the archive, or null if it doesn't exist.
*/
public static Archive readArchiveIfPresent(String baseDir) throws IOException {
String archiveFile = baseDir + File.separator + Archive.SESSIONS_SUBDIR + File.separator + "default" + SimpleSessions.SESSION_SUFFIX;
if (!new File(archiveFile).exists()) {
return null;
}
String pmFile = baseDir + File.separator + Archive.SESSIONS_SUBDIR + File.separator + Config.COLLECTION_METADATA_FILE;
try {
// not a concern right now. it it does become one, locking a small per-dir object like archiveFile.intern(), along with a ConcurrenctHashMap might handle it.
synchronized (globaldirToArchiveMap) {
// the archive is wrapped inside a weak ref to allow the archive object to be collected if there are no references to it (usually the references
// are in the user sessions).
WeakReference<Archive> wra = getArchiveFromGlobalArchiveMap(baseDir);
if (wra != null) {
Archive a = wra.get();
if (a != null) {
log.info("Great, could re-use loaded archive for dir: " + archiveFile + "; archive = " + a);
return a;
}
}
log.info("Archive not already loaded, reading from dir: " + archiveFile);
Map<String, Object> map = loadSessionAsMap(archiveFile, baseDir, true);
// read the session map, but only use archive
Archive a = (Archive) map.get("archive");
// could do more health checks on archive here
if (a == null) {
log.warn("Archive key is not present in archive file! The archive must be corrupted! directory:" + archiveFile);
return null;
}
a.setBaseDir(baseDir);
// no need to read archive authorized authorities, they will be loaded on demand from the legacy authorities.ser file
addToGlobalArchiveMap(baseDir, a);
// check if the loaded archive satisfy the verification condtiions. Call verify method on archive.
JSPHelper.log.info("After reading the archive checking if it is in good shape");
a.Verify();
return a;
}
} catch (Exception e) {
Util.print_exception("Error reading archive from dir: " + archiveFile, e, log);
throw new RuntimeException(e);
}
}
use of edu.stanford.muse.index.Archive in project epadd by ePADD.
the class SimpleSessions method prepareAndLoadArchive.
public static Archive prepareAndLoadArchive(MuseEmailFetcher m, HttpServletRequest request) throws IOException {
// here's where we create a fresh archive
String userKey = "user";
/*if (ModeConfig.isServerMode())
{
// use existing key, or if not available, ask the fetcher which has the login email addresses for a key
userKey = (String) session.getAttribute("userKey");
if (Util.nullOrEmpty(userKey))
userKey = m.getEffectiveUserKey();
Util.ASSERT(!Util.nullOrEmpty(userKey)); // disaster if we got here without a valid user key
}*/
int i = new Random().nextInt();
String randomPrefix = String.format("%08x", i);
String archiveDir = ModeConfig.isProcessingMode() ? Config.REPO_DIR_PROCESSING + File.separator + randomPrefix : SimpleSessions.CACHE_BASE_DIR + File.separator + userKey;
// String archiveDir = Sessions.CACHE_BASE_DIR + File.separator + userKey;
Archive archive = SimpleSessions.readArchiveIfPresent(archiveDir);
if (archive != null) {
JSPHelper.log.info("Good, existing archive found");
} else {
JSPHelper.log.info("Creating a new archive in " + archiveDir);
archive = JSPHelper.preparedArchive(request, archiveDir, new ArrayList<>());
// by this time the archive is created
// add this to global maps archiveID->archive, archive->archiveID
addToGlobalArchiveMap(archiveDir, archive);
}
Lexicon lex = archive.getLexicon("general");
/* if (lex != null)
session.setAttribute("lexicon", lex); // set up default general lexicon, so something is in the session as default lexicon (so facets can show it)
*/
return archive;
}
use of edu.stanford.muse.index.Archive in project epadd by ePADD.
the class CrossCollectionSearch method initialize.
/**
* initializes lookup structures (entity infos and ctokenToInfos) for cross collection search
* reads all archives available in the base dir.
* should be synchronized so there's no chance of doing it multiple times at the same time.
*/
private static synchronized void initialize(String baseDir) {
// this is created only once in one run. if it has already been created, reuse it.
// in the future, this may be read from a serialized file, etc.
cTokenToInfos = LinkedHashMultimap.create();
File[] files = new File(baseDir).listFiles();
if (files == null) {
log.warn("Trying to initialize cross collection search from an invalid directory: " + baseDir);
return;
}
int archiveNum = 0;
for (File f : files) {
if (!f.isDirectory())
continue;
try {
String archiveFile = f.getAbsolutePath() + File.separator + Archive.SESSIONS_SUBDIR + File.separator + "default" + SimpleSessions.getSessionSuffix();
if (!new File(archiveFile).exists())
continue;
Archive archive = SimpleSessions.readArchiveIfPresent(f.getAbsolutePath());
if (archive == null) {
log.warn("failed to read archive from " + f.getAbsolutePath());
continue;
}
log.info("Loaded archive from " + f.getAbsolutePath());
log.info("Loaded archive metadata from " + f.getAbsolutePath());
// process all docs in this archive to set up centityToInfo map
String archiveID = SimpleSessions.getArchiveIDForArchive(archive);
Map<String, EntityInfo> centityToInfo = new LinkedHashMap<>();
{
AddressBook ab = archive.addressBook;
for (Document d : archive.getAllDocs()) {
EmailDocument ed = (EmailDocument) d;
// compute centities, the set of all canonicalized entities in this doc.
// see spec in prodpad #140
// first come correspondents, then subject entities, then body.
// for correspondents we incl. all forms of their email addr or contact name)
// it should be a set because we want to count every string only once per message
Set<String> entities, correspondentEntities;
{
entities = new LinkedHashSet<>();
Set<Contact> contacts = ed.getParticipatingContacts(ab);
for (Contact c : contacts) {
if (c.getNames() != null)
entities.addAll(c.getNames());
if (c.getEmails() != null)
entities.addAll(c.getEmails());
}
// keep track of the correspondent centities also, separately because we need the isCorrespondent flag
correspondentEntities = new LinkedHashSet<>(entities);
Set<String> set = archive.getEntitiesInDoc(ed);
if (!Util.nullOrEmpty(set))
entities.addAll(set);
// filter out any null or empty strings (just in case)
// don't canonicalize right away because we need to keep the original form of the name
entities = entities.stream().filter(s -> !Util.nullOrEmpty(s)).collect(Collectors.toSet());
}
// convert the correspondent entities to c entities
Set<String> correspondentCEntities = correspondentEntities.stream().map(CrossCollectionSearch::canonicalize).collect(Collectors.toSet());
for (String entity : entities) {
String centity = canonicalize(entity);
EntityInfo ei = centityToInfo.get(centity);
if (ei == null) {
ei = new EntityInfo();
ei.archiveID = archiveID;
ei.displayName = entity;
centityToInfo.put(centity, ei);
}
// it is 1-way, i.e. once it is set, it will not be unset.
if (correspondentCEntities.contains(centity)) {
ei.isCorrespondent = true;
}
// update the first/last dates if needed
if (ei.firstDate == null || ei.firstDate.after(ed.date)) {
ei.firstDate = ed.date;
}
if (ei.lastDate == null || ei.lastDate.before(ed.date)) {
ei.lastDate = ed.date;
}
ei.count++;
}
}
}
log.info("Archive # " + archiveNum + " read " + centityToInfo.size() + " entities");
// now set up this map as a token map
for (EntityInfo ei : centityToInfo.values()) {
String entity = ei.displayName;
String centity = canonicalize(entity);
allCEntities.add(centity);
// consider a set of tokens because we don't want repeats
Set<String> ctokens = new LinkedHashSet<>(Util.tokenize(centity));
for (String ctoken : ctokens) cTokenToInfos.put(ctoken, ei);
}
} catch (Exception e) {
Util.print_exception("Error loading archive in directory " + f.getAbsolutePath(), e, log);
}
archiveNum++;
}
}
Aggregations