use of gov.loc.repository.bagit.hash.StandardSupportedAlgorithms in project bagit-java by LibraryOfCongress.
the class BagVerifierTest method testStandardSupportedAlgorithms.
@Test
public void testStandardSupportedAlgorithms() throws Exception {
List<String> algorithms = Arrays.asList("md5", "sha1", "sha256", "sha512");
for (String alg : algorithms) {
StandardSupportedAlgorithms algorithm = StandardSupportedAlgorithms.valueOf(alg.toUpperCase());
Manifest manifest = new Manifest(algorithm);
sut.checkHashes(manifest);
}
}
use of gov.loc.repository.bagit.hash.StandardSupportedAlgorithms in project epadd by ePADD.
the class ArchiveReaderWriter method saveArchive.
/**
* saves the archive in the current session to the cachedir. note: no blobs saved.
*/
/* mode attributes select if this archive was already part of a bag or is a first time creation. Based on this flag the ouptput directory
changes. In case of incremental bag update, the files will be in basedir/data/ subfolder whereas in case of fresh creation the files will be in
basedir.
*/
public static boolean saveArchive(String baseDir, String name, Archive archive, Archive.Save_Archive_Mode mode) throws IOException {
/*log.info("Before saving the archive checking if it is still in good shape");
archive.Verify();*/
String dir = baseDir + File.separatorChar + Archive.BAG_DATA_FOLDER + File.separatorChar + Archive.SESSIONS_SUBDIR;
// just to be safe
new File(dir).mkdirs();
String filename = dir + File.separatorChar + name + SESSION_SUFFIX;
log.info("Saving archive to (session) file " + filename);
/*//file path names of addressbook, entitybook and correspondentAuthorityMapper data.
String addressBookPath = dir + File.separatorChar + Archive.ADDRESSBOOK_SUFFIX;
String entityBookPath = dir + File.separatorChar + Archive.ENTITYBOOK_SUFFIX;
String cAuthorityPath = dir + File.separatorChar + Archive.CAUTHORITYMAPPER_SUFFIX;
*/
recalculateCollectionMetadata(archive);
try (ObjectOutputStream oos = new ObjectOutputStream(new BufferedOutputStream(new GZIPOutputStream(new FileOutputStream(filename))))) {
oos.writeObject("archive");
oos.writeObject(archive);
} catch (Exception e1) {
Util.print_exception("Failed to write archive: ", e1, log);
}
if (mode == Archive.Save_Archive_Mode.INCREMENTAL_UPDATE) {
// archive object doesn't get modified so no point in saving it for incremental one.
// archive.updateFileInBag(filename,archive.baseDir);
}
// ///////////////AddressBook Writing -- In human readable form ///////////////////////////////////
if (mode != Archive.Save_Archive_Mode.INCREMENTAL_UPDATE)
// no need to save addressbook while saving an archive in incrremental mode because address book is saved after every explicit modification.
saveAddressBook(archive, mode);
// //////////////EntityBook Writing -- In human readable form/////////////////////////////////////
saveEntityBookManager(archive, mode);
// /////////////CAuthorityMapper Writing-- Serialized///////////////////////////////
saveCorrespondentAuthorityMapper(archive, mode);
// ////////////LabelManager Writing -- Serialized//////////////////////////////////
saveLabelManager(archive, mode);
// ////////////AnnotationManager writing-- In human readable form/////////////////////////////////////
saveAnnotations(archive, mode);
saveCollectionMetadata(archive, mode);
// if archivesave mode is freshcreation then create a bag around basedir and set bag as this one..
if (mode == Archive.Save_Archive_Mode.FRESH_CREATION) {
StandardSupportedAlgorithms algorithm = StandardSupportedAlgorithms.MD5;
boolean includeHiddenFiles = false;
try {
archive.close();
// First copy the content of archive.baseDir + "/data" to archive.baseDir and then create an in place bag.
// Why so complicated? Because we wanted to have a uniform directory structure of archive irrespective of the fact whether it is
// in a bag or not. That structure is 'archive.baseDir + "/data" folder'
File tmp = Util.createTempDirectory();
tmp.delete();
// It seems that if indexer kept the file handles open then move directory failed on windows because of the lock held on those file
// therefore call archive.close() before moving stuff around
// archive.close();
FileUtils.moveDirectory(Paths.get(archive.baseDir + File.separatorChar + Archive.BAG_DATA_FOLDER).toFile(), tmp.toPath().toFile());
// Files.copy(Paths.get(userDir+File.separatorChar+Archive.BAG_DATA_FOLDER),tmp.toPath(),StandardCopyOption.REPLACE_EXISTING);
File wheretocopy = Paths.get(archive.baseDir).toFile();
Util.deleteDir(wheretocopy.getPath(), log);
FileUtils.moveDirectory(tmp.toPath().toFile(), wheretocopy);
Bag bag = BagCreator.bagInPlace(Paths.get(archive.baseDir), Arrays.asList(algorithm), includeHiddenFiles);
archive.openForRead();
archive.setArchiveBag(bag);
} catch (NoSuchAlgorithmException e) {
e.printStackTrace();
}
} else {
archive.close();
// re-open for reading
archive.openForRead();
}
return true;
}
use of gov.loc.repository.bagit.hash.StandardSupportedAlgorithms in project epadd by ePADD.
the class Archive method main.
/*public JSONArray getEntitiesCountAsJSON(Short entityType,int maxrecords){
Map<String, Integer> counts = new LinkedHashMap<>();
Map<String, String> canonicalToOriginal = new LinkedHashMap<>();
double cutoff = 0.001;
getEntitiesInfo( entityType, counts, canonicalToOriginal, cutoff);
List<Pair<String, Integer>> pairs = Util.sortMapByValue(counts);
int count = 0;
JSONArray resultArray = new JSONArray();
for (Pair<String, Integer> p: pairs) {
if (++count > maxrecords)
break;
String entity = p.getFirst();
String entityToPrint = canonicalToOriginal.get(entity);
JSONArray j = new JSONArray();
j.put (0, Util.escapeHTML(entityToPrint));
j.put (1, counts.get(entity));
resultArray.put (count-1, j);
}
return resultArray;
}
private void getEntitiesInfo(Short entitiyType, Map<String, Integer> counts, Map<String, String> canonicalToOriginal, double cutoff) {
Collection<EmailDocument> docs = (Collection) getAllDocs();
for (EmailDocument ed: docs) {
Span[] es = getEntitiesInDoc(ed,true);
List<Span> est = new ArrayList<>();
for(Span e: es)
if(NEType.getCoarseType(e.type).getCode() == entitiyType)
est.add(e);
Span[] fes = edu.stanford.muse.ie.Util.filterEntitiesByScore(est.toArray(new Span[est.size()]),cutoff);
//filter the entities to remove obvious junk
fes = edu.stanford.muse.ie.Util.filterEntities(fes);
// note that entities could have repetitions.
// so we create a *set* of entities, but after canonicalization.
// canonical to original just uses an arbitrary (first) occurrence of the entity
Set<String> canonicalEntities = new LinkedHashSet<String>();
for (Span esp: fes) {
String e = esp.getText();
String canonicalEntity = IndexUtils.canonicalizeEntity(e);
if (canonicalToOriginal.get(canonicalEntity) == null)
canonicalToOriginal.put(canonicalEntity, e);
canonicalEntities.add(canonicalEntity);
}
for (String ce: canonicalEntities)
{
Integer I = counts.get(ce);
counts.put(ce, (I == null) ? 1 : I+1);
}
}
}
*/
public static void main(String[] args) {
try {
String userDir = "/Users/tech/" + File.separator + "epadd-appraisal" + File.separator + "user/data/data" + File.separator + "blobs";
// String userDir = System.getProperty("user.home") + File.separator + "epadd-appraisal" + File.separator + "user";
// Bag b = Archive.readArchiveBag(userDir);
StandardSupportedAlgorithms algorithm = StandardSupportedAlgorithms.MD5;
BagCreator.bagInPlace(Paths.get(userDir), Arrays.asList(algorithm), false);
File tmp = Util.createTempDirectory();
tmp.delete();
FileUtils.moveDirectory(Paths.get(userDir + File.separatorChar + Archive.BAG_DATA_FOLDER).toFile(), tmp.toPath().toFile());
// Files.copy(Paths.get(userDir+File.separatorChar+Archive.BAG_DATA_FOLDER),tmp.toPath(),StandardCopyOption.REPLACE_EXISTING);
File wheretocopy = Paths.get(userDir).toFile();
wheretocopy.delete();
FileUtils.moveDirectory(tmp.toPath().toFile(), wheretocopy);
// Files.move(,Paths.get(userDir),StandardCopyOption.REPLACE_EXISTING);
boolean includeHiddenFiles = false;
// BagCreator.
Bag bag = BagCreator.bagInPlace(Paths.get(userDir), Arrays.asList(algorithm), includeHiddenFiles);
// write bag to disc.. in place of userDir..
// Path outputDir = Paths.get(userDir+"/bag");
//
// BagWriter.write(bag, outputDir); //where bag is a Bag object
/* // System.out.println(bag.getRootDir().toString());
Bag b = Archive.readArchiveBag(userDir);
BagWriter.write(b,Paths.get(userDir));
//update it in place.
//b.getPayLoadManifests().forEach(f->f.getFileToChecksumMap().pu);
b = Archive.readArchiveBag(userDir);
BagWriter.write(b,Paths.get(userDir));
*/
// System.out.println(b.getRootDir().toString());
Archive archive = ArchiveReaderWriter.readArchiveIfPresent(userDir);
// make some modification in the file labelcsv.
// update file.
archive.updateFileInBag((userDir + File.separatorChar + "data/sessions" + File.separatorChar + "collection-metadata.json"), userDir);
// then try to read it again..
archive = ArchiveReaderWriter.readArchiveIfPresent(userDir);
List<Document> docs = archive.getAllDocs();
int i = 0;
archive.assignThreadIds();
NER.NERStats stats = new NER.NERStats();
for (Document doc : docs) {
EmailDocument ed = (EmailDocument) doc;
stats.update(archive.getAllNamesInDoc(ed, true));
System.out.println(Arrays.asList(archive.getAllNamesInDoc(ed, true)));
if (i++ > 20)
break;
// List<Document> threads = archive.docsWithThreadId(ed.threadID);
// if(threads.size()>0){
// int numSent = 0;
// for(Document d: threads){
// EmailDocument thread = (EmailDocument)d;
// int sent = thread.sentOrReceived(archive.addressBook)&EmailDocument.SENT_MASK;
// if(sent>0)
// numSent++;
// }
// if(threads.size()!=numSent || threads.size()>2){
// System.err.println("Found a thread with "+numSent+" sent and "+threads.size()+" docs in a thread: "+ed.getSubject());
// break;
// }
// if(i%100 == 0)
// System.err.println("Scanned: "+i+" docs");
// }
// i++;
}
System.out.println(stats.counts);
System.out.println(stats.all);
} catch (Exception e) {
e.printStackTrace();
}
}
Aggregations