Search in sources :

Example 1 with StandardSupportedAlgorithms

use of gov.loc.repository.bagit.hash.StandardSupportedAlgorithms in project bagit-java by LibraryOfCongress.

the class BagVerifierTest method testStandardSupportedAlgorithms.

@Test
public void testStandardSupportedAlgorithms() throws Exception {
    List<String> algorithms = Arrays.asList("md5", "sha1", "sha256", "sha512");
    for (String alg : algorithms) {
        StandardSupportedAlgorithms algorithm = StandardSupportedAlgorithms.valueOf(alg.toUpperCase());
        Manifest manifest = new Manifest(algorithm);
        sut.checkHashes(manifest);
    }
}
Also used : StandardSupportedAlgorithms(gov.loc.repository.bagit.hash.StandardSupportedAlgorithms) Manifest(gov.loc.repository.bagit.domain.Manifest) Test(org.junit.Test)

Example 2 with StandardSupportedAlgorithms

use of gov.loc.repository.bagit.hash.StandardSupportedAlgorithms in project epadd by ePADD.

the class ArchiveReaderWriter method saveArchive.

/**
 * saves the archive in the current session to the cachedir. note: no blobs saved.
 */
/* mode attributes select if this archive was already part of a bag or is a first time creation. Based on this flag the ouptput directory
    changes. In case of incremental bag update, the files will be in basedir/data/ subfolder whereas in case of fresh creation the files will be in
    basedir.
     */
public static boolean saveArchive(String baseDir, String name, Archive archive, Archive.Save_Archive_Mode mode) throws IOException {
    /*log.info("Before saving the archive checking if it is still in good shape");
        archive.Verify();*/
    String dir = baseDir + File.separatorChar + Archive.BAG_DATA_FOLDER + File.separatorChar + Archive.SESSIONS_SUBDIR;
    // just to be safe
    new File(dir).mkdirs();
    String filename = dir + File.separatorChar + name + SESSION_SUFFIX;
    log.info("Saving archive to (session) file " + filename);
    /*//file path names of addressbook, entitybook and correspondentAuthorityMapper data.
        String addressBookPath = dir + File.separatorChar + Archive.ADDRESSBOOK_SUFFIX;
        String entityBookPath = dir + File.separatorChar + Archive.ENTITYBOOK_SUFFIX;
        String cAuthorityPath =  dir + File.separatorChar + Archive.CAUTHORITYMAPPER_SUFFIX;
        */
    recalculateCollectionMetadata(archive);
    try (ObjectOutputStream oos = new ObjectOutputStream(new BufferedOutputStream(new GZIPOutputStream(new FileOutputStream(filename))))) {
        oos.writeObject("archive");
        oos.writeObject(archive);
    } catch (Exception e1) {
        Util.print_exception("Failed to write archive: ", e1, log);
    }
    if (mode == Archive.Save_Archive_Mode.INCREMENTAL_UPDATE) {
    // archive object doesn't get modified so no point in saving it for incremental one.
    // archive.updateFileInBag(filename,archive.baseDir);
    }
    // ///////////////AddressBook Writing -- In human readable form ///////////////////////////////////
    if (mode != Archive.Save_Archive_Mode.INCREMENTAL_UPDATE)
        // no need to save addressbook while saving an archive in incrremental mode because address book is saved after every explicit modification.
        saveAddressBook(archive, mode);
    // //////////////EntityBook Writing -- In human readable form/////////////////////////////////////
    saveEntityBookManager(archive, mode);
    // /////////////CAuthorityMapper Writing-- Serialized///////////////////////////////
    saveCorrespondentAuthorityMapper(archive, mode);
    // ////////////LabelManager Writing -- Serialized//////////////////////////////////
    saveLabelManager(archive, mode);
    // ////////////AnnotationManager writing-- In human readable form/////////////////////////////////////
    saveAnnotations(archive, mode);
    saveCollectionMetadata(archive, mode);
    // if archivesave mode is freshcreation then create a bag around basedir and set bag as this one..
    if (mode == Archive.Save_Archive_Mode.FRESH_CREATION) {
        StandardSupportedAlgorithms algorithm = StandardSupportedAlgorithms.MD5;
        boolean includeHiddenFiles = false;
        try {
            archive.close();
            // First copy the content of archive.baseDir + "/data" to archive.baseDir and then create an in place bag.
            // Why so complicated? Because we wanted to have a uniform directory structure of archive irrespective of the fact whether it is
            // in a bag or not. That structure is 'archive.baseDir + "/data" folder'
            File tmp = Util.createTempDirectory();
            tmp.delete();
            // It seems that if indexer kept the file handles open then move directory failed on windows because of the lock held on those file
            // therefore call archive.close() before moving stuff around
            // archive.close();
            FileUtils.moveDirectory(Paths.get(archive.baseDir + File.separatorChar + Archive.BAG_DATA_FOLDER).toFile(), tmp.toPath().toFile());
            // Files.copy(Paths.get(userDir+File.separatorChar+Archive.BAG_DATA_FOLDER),tmp.toPath(),StandardCopyOption.REPLACE_EXISTING);
            File wheretocopy = Paths.get(archive.baseDir).toFile();
            Util.deleteDir(wheretocopy.getPath(), log);
            FileUtils.moveDirectory(tmp.toPath().toFile(), wheretocopy);
            Bag bag = BagCreator.bagInPlace(Paths.get(archive.baseDir), Arrays.asList(algorithm), includeHiddenFiles);
            archive.openForRead();
            archive.setArchiveBag(bag);
        } catch (NoSuchAlgorithmException e) {
            e.printStackTrace();
        }
    } else {
        archive.close();
        // re-open for reading
        archive.openForRead();
    }
    return true;
}
Also used : StandardSupportedAlgorithms(gov.loc.repository.bagit.hash.StandardSupportedAlgorithms) GZIPOutputStream(java.util.zip.GZIPOutputStream) Bag(gov.loc.repository.bagit.domain.Bag) NoSuchAlgorithmException(java.security.NoSuchAlgorithmException) ParseException(org.apache.lucene.queryparser.classic.ParseException) LockObtainFailedException(org.apache.lucene.store.LockObtainFailedException) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) NoSuchAlgorithmException(java.security.NoSuchAlgorithmException)

Example 3 with StandardSupportedAlgorithms

use of gov.loc.repository.bagit.hash.StandardSupportedAlgorithms in project epadd by ePADD.

the class Archive method main.

/*public JSONArray getEntitiesCountAsJSON(Short entityType,int maxrecords){

        Map<String, Integer> counts = new LinkedHashMap<>();
        Map<String, String> canonicalToOriginal = new LinkedHashMap<>();

        double cutoff = 0.001;
        getEntitiesInfo( entityType, counts, canonicalToOriginal, cutoff);
        List<Pair<String, Integer>> pairs = Util.sortMapByValue(counts);
         int count = 0;
        JSONArray resultArray = new JSONArray();
        for (Pair<String, Integer> p: pairs) {
            if (++count > maxrecords)
                break;
            String entity = p.getFirst();
            String entityToPrint = canonicalToOriginal.get(entity);
            JSONArray j = new JSONArray();
            j.put (0, Util.escapeHTML(entityToPrint));
            j.put (1, counts.get(entity));

            resultArray.put (count-1, j);
        }
        return resultArray;
    }

    private void getEntitiesInfo(Short entitiyType, Map<String, Integer> counts, Map<String, String> canonicalToOriginal, double cutoff) {
        Collection<EmailDocument> docs = (Collection) getAllDocs();
        for (EmailDocument ed: docs) {
            Span[] es = getEntitiesInDoc(ed,true);
            List<Span> est = new ArrayList<>();
            for(Span e: es)
                if(NEType.getCoarseType(e.type).getCode() == entitiyType)
                    est.add(e);

            Span[] fes = edu.stanford.muse.ie.Util.filterEntitiesByScore(est.toArray(new Span[est.size()]),cutoff);
            //filter the entities to remove obvious junk
            fes = edu.stanford.muse.ie.Util.filterEntities(fes);
            // note that entities could have repetitions.
            // so we create a *set* of entities, but after canonicalization.
            // canonical to original just uses an arbitrary (first) occurrence of the entity
            Set<String> canonicalEntities = new LinkedHashSet<String>();
            for (Span esp: fes) {
                String e = esp.getText();
                String canonicalEntity = IndexUtils.canonicalizeEntity(e);
                if (canonicalToOriginal.get(canonicalEntity) == null)
                    canonicalToOriginal.put(canonicalEntity, e);
                canonicalEntities.add(canonicalEntity);
            }

            for (String ce: canonicalEntities)
            {
                Integer I = counts.get(ce);
                counts.put(ce, (I == null) ? 1 : I+1);
            }
        }
    }
*/
public static void main(String[] args) {
    try {
        String userDir = "/Users/tech/" + File.separator + "epadd-appraisal" + File.separator + "user/data/data" + File.separator + "blobs";
        // String userDir = System.getProperty("user.home") + File.separator + "epadd-appraisal" + File.separator + "user";
        // Bag b = Archive.readArchiveBag(userDir);
        StandardSupportedAlgorithms algorithm = StandardSupportedAlgorithms.MD5;
        BagCreator.bagInPlace(Paths.get(userDir), Arrays.asList(algorithm), false);
        File tmp = Util.createTempDirectory();
        tmp.delete();
        FileUtils.moveDirectory(Paths.get(userDir + File.separatorChar + Archive.BAG_DATA_FOLDER).toFile(), tmp.toPath().toFile());
        // Files.copy(Paths.get(userDir+File.separatorChar+Archive.BAG_DATA_FOLDER),tmp.toPath(),StandardCopyOption.REPLACE_EXISTING);
        File wheretocopy = Paths.get(userDir).toFile();
        wheretocopy.delete();
        FileUtils.moveDirectory(tmp.toPath().toFile(), wheretocopy);
        // Files.move(,Paths.get(userDir),StandardCopyOption.REPLACE_EXISTING);
        boolean includeHiddenFiles = false;
        // BagCreator.
        Bag bag = BagCreator.bagInPlace(Paths.get(userDir), Arrays.asList(algorithm), includeHiddenFiles);
        // write bag to disc.. in place of userDir..
        // Path outputDir = Paths.get(userDir+"/bag");
        // 
        // BagWriter.write(bag, outputDir); //where bag is a Bag object
        /*     //   System.out.println(bag.getRootDir().toString());
            Bag b = Archive.readArchiveBag(userDir);
            BagWriter.write(b,Paths.get(userDir));
//update it in place.
//b.getPayLoadManifests().forEach(f->f.getFileToChecksumMap().pu);
            b = Archive.readArchiveBag(userDir);
            BagWriter.write(b,Paths.get(userDir));
*/
        // System.out.println(b.getRootDir().toString());
        Archive archive = ArchiveReaderWriter.readArchiveIfPresent(userDir);
        // make some modification in the file labelcsv.
        // update file.
        archive.updateFileInBag((userDir + File.separatorChar + "data/sessions" + File.separatorChar + "collection-metadata.json"), userDir);
        // then try to read it again..
        archive = ArchiveReaderWriter.readArchiveIfPresent(userDir);
        List<Document> docs = archive.getAllDocs();
        int i = 0;
        archive.assignThreadIds();
        NER.NERStats stats = new NER.NERStats();
        for (Document doc : docs) {
            EmailDocument ed = (EmailDocument) doc;
            stats.update(archive.getAllNamesInDoc(ed, true));
            System.out.println(Arrays.asList(archive.getAllNamesInDoc(ed, true)));
            if (i++ > 20)
                break;
        // List<Document> threads = archive.docsWithThreadId(ed.threadID);
        // if(threads.size()>0){
        // int numSent = 0;
        // for(Document d: threads){
        // EmailDocument thread = (EmailDocument)d;
        // int sent = thread.sentOrReceived(archive.addressBook)&EmailDocument.SENT_MASK;
        // if(sent>0)
        // numSent++;
        // }
        // if(threads.size()!=numSent || threads.size()>2){
        // System.err.println("Found a thread with "+numSent+" sent and "+threads.size()+" docs in a thread: "+ed.getSubject());
        // break;
        // }
        // if(i%100 == 0)
        // System.err.println("Scanned: "+i+" docs");
        // }
        // i++;
        }
        System.out.println(stats.counts);
        System.out.println(stats.all);
    } catch (Exception e) {
        e.printStackTrace();
    }
}
Also used : StandardSupportedAlgorithms(gov.loc.repository.bagit.hash.StandardSupportedAlgorithms) ParseException(org.apache.lucene.queryparser.classic.ParseException) NoSuchAlgorithmException(java.security.NoSuchAlgorithmException) NER(edu.stanford.muse.ner.NER)

Aggregations

StandardSupportedAlgorithms (gov.loc.repository.bagit.hash.StandardSupportedAlgorithms)3 NoSuchAlgorithmException (java.security.NoSuchAlgorithmException)2 ParseException (org.apache.lucene.queryparser.classic.ParseException)2 NER (edu.stanford.muse.ner.NER)1 Bag (gov.loc.repository.bagit.domain.Bag)1 Manifest (gov.loc.repository.bagit.domain.Manifest)1 GZIPOutputStream (java.util.zip.GZIPOutputStream)1 CorruptIndexException (org.apache.lucene.index.CorruptIndexException)1 LockObtainFailedException (org.apache.lucene.store.LockObtainFailedException)1 Test (org.junit.Test)1