Search in sources :

Example 1 with HashAssembler

use of com.opentext.ia.sdk.support.io.HashAssembler in project infoarchive-sip-sdk by Enterprise-Content-Management.

the class FileArchiver method run.

private void run(String rootPath, String sip) throws IOException {
    File sipFile = new File(sip).getCanonicalFile();
    if (!sipFile.getParentFile().mkdirs() && !sipFile.getParentFile().isDirectory()) {
        throw new IllegalStateException("Could not create all required directories in path " + sipFile.getParentFile().getAbsolutePath());
    }
    System.out.printf("%nSample 2: Archiving files from %s into %s%n", rootPath, sipFile.getPath());
    // Tell InfoArchive where and how to archive the data
    URI entityUri = URI.create("urn:com.opentext.ia.sdk.sample.file:1.0");
    String entityName = "file";
    PackagingInformation prototype = PackagingInformation.builder().dss().application("fileApplication").holding("fileHolding").producer("SIP SDK").entity(entityName).schema(entityUri.toString()).end().build();
    // Define a mapping from our domain object to the PDI XML
    PdiAssembler<File> pdiAssembler = new XmlPdiAssembler<File>(entityUri, entityName) {

        @Override
        protected void doAdd(File file, Map<String, ContentInfo> contentInfo) {
            try {
                String path = relativePath(file, rootPath);
                getBuilder().element("path", path).element("size", Long.toString(file.length())).element("permissions", permissionsOf(file)).element("contentType", Files.probeContentType(file.toPath())).elements("hashes", "hash", contentInfo.get(path).getContentHashes(), (hash, builder) -> {
                    builder.attribute("algorithm", hash.getHashFunction()).attribute("encoding", hash.getEncoding()).attribute("value", hash.getValue());
                });
            } catch (IOException e) {
                throw new RuntimeIoException(e);
            }
        }
    };
    DigitalObjectsExtraction<File> contentAssembler = file -> Collections.singleton(DigitalObject.fromFile(relativePath(file, rootPath), file)).iterator();
    HashAssembler contentHashAssembler = new SingleHashAssembler(HashFunction.SHA256, Encoding.BASE64);
    // Assemble the SIP
    SipAssembler<File> assembler = SipAssembler.forPdiAndContentWithContentHashing(prototype, pdiAssembler, contentAssembler, contentHashAssembler);
    assembler.start(new FileBuffer(sipFile));
    try {
        addFilesIn(new File(rootPath), rootPath, relativePath(sipFile, rootPath), assembler);
    } finally {
        assembler.end();
    }
    // Show metrics about the assembly process
    SipMetrics metrics = assembler.getMetrics();
    System.out.printf("  Added %d files to SIP of %d bytes in %d ms%n", metrics.numDigitalObjects(), metrics.sipFileSize(), metrics.assemblyTime());
}
Also used : XmlPdiAssembler(com.opentext.ia.sdk.sip.XmlPdiAssembler) DigitalObject(com.opentext.ia.sdk.sip.DigitalObject) Files(java.nio.file.Files) PdiAssembler(com.opentext.ia.sdk.sip.PdiAssembler) IOException(java.io.IOException) PackagingInformation(com.opentext.ia.sdk.sip.PackagingInformation) SipMetrics(com.opentext.ia.sdk.sip.SipMetrics) File(java.io.File) ContentInfo(com.opentext.ia.sdk.sip.ContentInfo) HashFunction(com.opentext.ia.sdk.support.io.HashFunction) SipAssembler(com.opentext.ia.sdk.sip.SipAssembler) RuntimeIoException(com.opentext.ia.sdk.support.io.RuntimeIoException) Map(java.util.Map) SingleHashAssembler(com.opentext.ia.sdk.support.io.SingleHashAssembler) DigitalObjectsExtraction(com.opentext.ia.sdk.sip.DigitalObjectsExtraction) FileBuffer(com.opentext.ia.sdk.support.io.FileBuffer) Encoding(com.opentext.ia.sdk.support.io.Encoding) URI(java.net.URI) Collections(java.util.Collections) HashAssembler(com.opentext.ia.sdk.support.io.HashAssembler) SingleHashAssembler(com.opentext.ia.sdk.support.io.SingleHashAssembler) SingleHashAssembler(com.opentext.ia.sdk.support.io.SingleHashAssembler) HashAssembler(com.opentext.ia.sdk.support.io.HashAssembler) FileBuffer(com.opentext.ia.sdk.support.io.FileBuffer) IOException(java.io.IOException) URI(java.net.URI) RuntimeIoException(com.opentext.ia.sdk.support.io.RuntimeIoException) XmlPdiAssembler(com.opentext.ia.sdk.sip.XmlPdiAssembler) File(java.io.File) Map(java.util.Map) PackagingInformation(com.opentext.ia.sdk.sip.PackagingInformation) SipMetrics(com.opentext.ia.sdk.sip.SipMetrics)

Example 2 with HashAssembler

use of com.opentext.ia.sdk.support.io.HashAssembler in project infoarchive-sip-sdk by Enterprise-Content-Management.

the class ContentAssemblerDefault method contentHashFor.

protected Collection<EncodedHash> contentHashFor(InputStream stream) throws IOException {
    final HashAssembler hashAssembler = getContentHashAssembler();
    synchronized (hashAssembler) {
        hashAssembler.initialize();
        IOStreams.copy(stream, NullOutputStream.NULL_OUTPUT_STREAM, BUFFER_SIZE, hashAssembler);
        return hashAssembler.get();
    }
}
Also used : HashAssembler(com.opentext.ia.sdk.support.io.HashAssembler)

Example 3 with HashAssembler

use of com.opentext.ia.sdk.support.io.HashAssembler in project infoarchive-sip-sdk by Enterprise-Content-Management.

the class WhenAssemblingSips method shouldZipContentsAndReportMetrics.

@Test
void shouldZipContentsAndReportMetrics() throws IOException {
    Assembler<HashedContents<Object>> pdiAssembler = mock(Assembler.class);
    HashAssembler pdiHashAssembler = mock(HashAssembler.class);
    EncodedHash hash = someHash();
    when(pdiHashAssembler.get()).thenReturn(Collections.singletonList(hash));
    long pdiSize = randomInt(7, 128);
    when(pdiHashAssembler.numBytesHashed()).thenReturn(pdiSize);
    Object object1 = "object1_" + randomString(8);
    Object object2 = "object2_" + randomString(8);
    Collection<Object> objects = Arrays.asList(object1, object2);
    DigitalObjectsExtraction<Object> contentsExtraction = mock(DigitalObjectsExtraction.class);
    String id1a = randomString(8);
    String id1b = randomString(8);
    String id2 = randomString(8);
    List<? extends DigitalObject> digitalObjects1 = Arrays.asList(someContentDataObject(id1a), someContentDataObject(id1b));
    List<? extends DigitalObject> digitalObjects2 = Collections.singletonList(someContentDataObject(id2));
    when(contentsExtraction.apply(object1)).thenAnswer(invocation -> digitalObjects1.iterator());
    when(contentsExtraction.apply(object2)).thenAnswer(invocation -> digitalObjects2.iterator());
    HashAssembler contentHashAssembler = mock(HashAssembler.class);
    Collection<EncodedHash> hashes1a = Collections.singletonList(someHash());
    Collection<EncodedHash> hashes1b = Collections.singletonList(someHash());
    Collection<EncodedHash> hashes2 = Collections.singletonList(someHash());
    Iterator<Collection<EncodedHash>> hashes = Arrays.asList(hashes1a, hashes1b, hashes2).iterator();
    when(contentHashAssembler.get()).thenAnswer(invocation -> hashes.next());
    long digitalObjectSize = randomInt(5, 255);
    when(contentHashAssembler.numBytesHashed()).thenReturn(digitalObjectSize);
    Map<String, ContentInfo> hashesById1 = new HashMap<>();
    hashesById1.put(id1a, new ContentInfo(id1a, hashes1a));
    hashesById1.put(id1b, new ContentInfo(id1b, hashes1b));
    Map<String, ContentInfo> hashesById2 = Collections.singletonMap(id2, new ContentInfo(id2, hashes2));
    PackagingInformation packagingInformationPrototype = somePackagingInformation();
    SipAssembler<Object> sipAssembler = SipAssembler.forPdiAndContentWithHashing(packagingInformationPrototype, pdiAssembler, pdiHashAssembler, contentsExtraction, contentHashAssembler);
    DataBuffer buffer = new MemoryBuffer();
    long time = System.currentTimeMillis();
    sipAssembler.start(buffer);
    for (Object object : objects) {
        sipAssembler.add(object);
    }
    sipAssembler.end();
    time = System.currentTimeMillis() - time;
    verify(pdiAssembler).start(any(DataBuffer.class));
    verify(pdiAssembler).add(eq(new HashedContents<>(object1, hashesById1)));
    verify(pdiAssembler).add(eq(new HashedContents<>(object2, hashesById2)));
    verify(pdiAssembler).end();
    try (ZipInputStream zip = new ZipInputStream(buffer.openForReading())) {
        assertContentDataObject(zip, id1a);
        assertContentDataObject(zip, id1b);
        assertContentDataObject(zip, id2);
        assertPreservationDescriptionInformation(zip);
        assertPackagingInformation(zip, objects, hash);
        assertNull(zip.getNextEntry(), "Additional zip entries");
    }
    SipMetrics metrics = sipAssembler.getMetrics();
    assertEquals(2, metrics.numAius(), SipMetrics.NUM_AIUS);
    assertEquals(3, metrics.numDigitalObjects(), SipMetrics.NUM_DIGITAL_OBJECTS);
    assertEquals(time, metrics.assemblyTime(), DELTA_MS, SipMetrics.ASSEMBLY_TIME);
    assertEquals(3 * digitalObjectSize, metrics.digitalObjectsSize(), SipMetrics.SIZE_DIGITAL_OBJECTS);
    assertEquals(pdiSize, metrics.pdiSize(), SipMetrics.SIZE_PDI);
    // long packagingInformationSize =
    // getPackagingInformationSize(packagingInformationPrototype, 2, Optional.of(hash));
    // assertEquals(pdiSize + 3 * digitalObjectSize + packagingInformationSize, metrics.sipSize(),
    // SipMetrics.SIZE_SIP);
    assertEquals(buffer.length(), metrics.sipFileSize(), SipMetrics.SIZE_SIP_FILE);
}
Also used : EncodedHash(com.opentext.ia.sdk.support.io.EncodedHash) NoHashAssembler(com.opentext.ia.sdk.support.io.NoHashAssembler) HashAssembler(com.opentext.ia.sdk.support.io.HashAssembler) HashMap(java.util.HashMap) MemoryBuffer(com.opentext.ia.sdk.support.io.MemoryBuffer) ZipInputStream(java.util.zip.ZipInputStream) Collection(java.util.Collection) DataBuffer(com.opentext.ia.sdk.support.io.DataBuffer) Test(org.junit.jupiter.api.Test)

Example 4 with HashAssembler

use of com.opentext.ia.sdk.support.io.HashAssembler in project infoarchive-sip-sdk by Enterprise-Content-Management.

the class WhenAssemblingSips method shouldMeasurePdiSizeBeforeEnd.

@Test
void shouldMeasurePdiSizeBeforeEnd() throws IOException {
    long pdiSize = randomInt(13, 313);
    DataBuffer pdiBuffer = new MemoryBuffer() {

        @Override
        public long length() {
            return pdiSize;
        }
    };
    HashAssembler noHashAssembler = new NoHashAssembler();
    Assembler<HashedContents<Object>> pdiAssembler = mock(Assembler.class);
    SipAssembler<Object> sipAssembler = new SipAssembler<>(new DefaultPackagingInformationFactory(somePackagingInformation()), pdiAssembler, noHashAssembler, () -> pdiBuffer, ContentAssembler.noDedup(domainObject -> Collections.emptyIterator(), noHashAssembler));
    sipAssembler.start(new MemoryBuffer());
    sipAssembler.add(new Object());
    SipMetrics metrics = sipAssembler.getMetrics();
    assertEquals(pdiSize, metrics.pdiSize(), SipMetrics.SIZE_PDI);
}
Also used : ArgumentMatchers.any(org.mockito.ArgumentMatchers.any) Assertions.assertNotNull(org.junit.jupiter.api.Assertions.assertNotNull) Arrays(java.util.Arrays) ByteArrayInputOutputStream(com.opentext.ia.sdk.support.io.ByteArrayInputOutputStream) ZipInputStream(java.util.zip.ZipInputStream) ArgumentMatchers.eq(org.mockito.ArgumentMatchers.eq) Assertions.assertNull(org.junit.jupiter.api.Assertions.assertNull) HashMap(java.util.HashMap) EncodedHash(com.opentext.ia.sdk.support.io.EncodedHash) ByteArrayInputStream(java.io.ByteArrayInputStream) Map(java.util.Map) Encoding(com.opentext.ia.sdk.support.io.Encoding) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) ZipEntry(java.util.zip.ZipEntry) Iterator(java.util.Iterator) DataBuffer(com.opentext.ia.sdk.support.io.DataBuffer) Collection(java.util.Collection) IOException(java.io.IOException) Mockito.when(org.mockito.Mockito.when) XmlUtil(com.opentext.ia.sdk.support.xml.XmlUtil) Mockito.verify(org.mockito.Mockito.verify) Test(org.junit.jupiter.api.Test) IOUtils(org.apache.commons.io.IOUtils) HashFunction(com.opentext.ia.sdk.support.io.HashFunction) List(java.util.List) Element(org.w3c.dom.Element) NoHashAssembler(com.opentext.ia.sdk.support.io.NoHashAssembler) MemoryBuffer(com.opentext.ia.sdk.support.io.MemoryBuffer) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) Collections(java.util.Collections) HashAssembler(com.opentext.ia.sdk.support.io.HashAssembler) Mockito.mock(org.mockito.Mockito.mock) NoHashAssembler(com.opentext.ia.sdk.support.io.NoHashAssembler) HashAssembler(com.opentext.ia.sdk.support.io.HashAssembler) NoHashAssembler(com.opentext.ia.sdk.support.io.NoHashAssembler) MemoryBuffer(com.opentext.ia.sdk.support.io.MemoryBuffer) DataBuffer(com.opentext.ia.sdk.support.io.DataBuffer) Test(org.junit.jupiter.api.Test)

Aggregations

HashAssembler (com.opentext.ia.sdk.support.io.HashAssembler)4 DataBuffer (com.opentext.ia.sdk.support.io.DataBuffer)2 EncodedHash (com.opentext.ia.sdk.support.io.EncodedHash)2 Encoding (com.opentext.ia.sdk.support.io.Encoding)2 HashFunction (com.opentext.ia.sdk.support.io.HashFunction)2 MemoryBuffer (com.opentext.ia.sdk.support.io.MemoryBuffer)2 NoHashAssembler (com.opentext.ia.sdk.support.io.NoHashAssembler)2 IOException (java.io.IOException)2 Collection (java.util.Collection)2 Collections (java.util.Collections)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2 ZipInputStream (java.util.zip.ZipInputStream)2 Test (org.junit.jupiter.api.Test)2 ContentInfo (com.opentext.ia.sdk.sip.ContentInfo)1 DigitalObject (com.opentext.ia.sdk.sip.DigitalObject)1 DigitalObjectsExtraction (com.opentext.ia.sdk.sip.DigitalObjectsExtraction)1 PackagingInformation (com.opentext.ia.sdk.sip.PackagingInformation)1 PdiAssembler (com.opentext.ia.sdk.sip.PdiAssembler)1 SipAssembler (com.opentext.ia.sdk.sip.SipAssembler)1