use of com.opentext.ia.sdk.support.io.HashAssembler in project infoarchive-sip-sdk by Enterprise-Content-Management.
the class FileArchiver method run.
private void run(String rootPath, String sip) throws IOException {
File sipFile = new File(sip).getCanonicalFile();
if (!sipFile.getParentFile().mkdirs() && !sipFile.getParentFile().isDirectory()) {
throw new IllegalStateException("Could not create all required directories in path " + sipFile.getParentFile().getAbsolutePath());
}
System.out.printf("%nSample 2: Archiving files from %s into %s%n", rootPath, sipFile.getPath());
// Tell InfoArchive where and how to archive the data
URI entityUri = URI.create("urn:com.opentext.ia.sdk.sample.file:1.0");
String entityName = "file";
PackagingInformation prototype = PackagingInformation.builder().dss().application("fileApplication").holding("fileHolding").producer("SIP SDK").entity(entityName).schema(entityUri.toString()).end().build();
// Define a mapping from our domain object to the PDI XML
PdiAssembler<File> pdiAssembler = new XmlPdiAssembler<File>(entityUri, entityName) {
@Override
protected void doAdd(File file, Map<String, ContentInfo> contentInfo) {
try {
String path = relativePath(file, rootPath);
getBuilder().element("path", path).element("size", Long.toString(file.length())).element("permissions", permissionsOf(file)).element("contentType", Files.probeContentType(file.toPath())).elements("hashes", "hash", contentInfo.get(path).getContentHashes(), (hash, builder) -> {
builder.attribute("algorithm", hash.getHashFunction()).attribute("encoding", hash.getEncoding()).attribute("value", hash.getValue());
});
} catch (IOException e) {
throw new RuntimeIoException(e);
}
}
};
DigitalObjectsExtraction<File> contentAssembler = file -> Collections.singleton(DigitalObject.fromFile(relativePath(file, rootPath), file)).iterator();
HashAssembler contentHashAssembler = new SingleHashAssembler(HashFunction.SHA256, Encoding.BASE64);
// Assemble the SIP
SipAssembler<File> assembler = SipAssembler.forPdiAndContentWithContentHashing(prototype, pdiAssembler, contentAssembler, contentHashAssembler);
assembler.start(new FileBuffer(sipFile));
try {
addFilesIn(new File(rootPath), rootPath, relativePath(sipFile, rootPath), assembler);
} finally {
assembler.end();
}
// Show metrics about the assembly process
SipMetrics metrics = assembler.getMetrics();
System.out.printf(" Added %d files to SIP of %d bytes in %d ms%n", metrics.numDigitalObjects(), metrics.sipFileSize(), metrics.assemblyTime());
}
use of com.opentext.ia.sdk.support.io.HashAssembler in project infoarchive-sip-sdk by Enterprise-Content-Management.
the class ContentAssemblerDefault method contentHashFor.
protected Collection<EncodedHash> contentHashFor(InputStream stream) throws IOException {
final HashAssembler hashAssembler = getContentHashAssembler();
synchronized (hashAssembler) {
hashAssembler.initialize();
IOStreams.copy(stream, NullOutputStream.NULL_OUTPUT_STREAM, BUFFER_SIZE, hashAssembler);
return hashAssembler.get();
}
}
use of com.opentext.ia.sdk.support.io.HashAssembler in project infoarchive-sip-sdk by Enterprise-Content-Management.
the class WhenAssemblingSips method shouldZipContentsAndReportMetrics.
@Test
void shouldZipContentsAndReportMetrics() throws IOException {
Assembler<HashedContents<Object>> pdiAssembler = mock(Assembler.class);
HashAssembler pdiHashAssembler = mock(HashAssembler.class);
EncodedHash hash = someHash();
when(pdiHashAssembler.get()).thenReturn(Collections.singletonList(hash));
long pdiSize = randomInt(7, 128);
when(pdiHashAssembler.numBytesHashed()).thenReturn(pdiSize);
Object object1 = "object1_" + randomString(8);
Object object2 = "object2_" + randomString(8);
Collection<Object> objects = Arrays.asList(object1, object2);
DigitalObjectsExtraction<Object> contentsExtraction = mock(DigitalObjectsExtraction.class);
String id1a = randomString(8);
String id1b = randomString(8);
String id2 = randomString(8);
List<? extends DigitalObject> digitalObjects1 = Arrays.asList(someContentDataObject(id1a), someContentDataObject(id1b));
List<? extends DigitalObject> digitalObjects2 = Collections.singletonList(someContentDataObject(id2));
when(contentsExtraction.apply(object1)).thenAnswer(invocation -> digitalObjects1.iterator());
when(contentsExtraction.apply(object2)).thenAnswer(invocation -> digitalObjects2.iterator());
HashAssembler contentHashAssembler = mock(HashAssembler.class);
Collection<EncodedHash> hashes1a = Collections.singletonList(someHash());
Collection<EncodedHash> hashes1b = Collections.singletonList(someHash());
Collection<EncodedHash> hashes2 = Collections.singletonList(someHash());
Iterator<Collection<EncodedHash>> hashes = Arrays.asList(hashes1a, hashes1b, hashes2).iterator();
when(contentHashAssembler.get()).thenAnswer(invocation -> hashes.next());
long digitalObjectSize = randomInt(5, 255);
when(contentHashAssembler.numBytesHashed()).thenReturn(digitalObjectSize);
Map<String, ContentInfo> hashesById1 = new HashMap<>();
hashesById1.put(id1a, new ContentInfo(id1a, hashes1a));
hashesById1.put(id1b, new ContentInfo(id1b, hashes1b));
Map<String, ContentInfo> hashesById2 = Collections.singletonMap(id2, new ContentInfo(id2, hashes2));
PackagingInformation packagingInformationPrototype = somePackagingInformation();
SipAssembler<Object> sipAssembler = SipAssembler.forPdiAndContentWithHashing(packagingInformationPrototype, pdiAssembler, pdiHashAssembler, contentsExtraction, contentHashAssembler);
DataBuffer buffer = new MemoryBuffer();
long time = System.currentTimeMillis();
sipAssembler.start(buffer);
for (Object object : objects) {
sipAssembler.add(object);
}
sipAssembler.end();
time = System.currentTimeMillis() - time;
verify(pdiAssembler).start(any(DataBuffer.class));
verify(pdiAssembler).add(eq(new HashedContents<>(object1, hashesById1)));
verify(pdiAssembler).add(eq(new HashedContents<>(object2, hashesById2)));
verify(pdiAssembler).end();
try (ZipInputStream zip = new ZipInputStream(buffer.openForReading())) {
assertContentDataObject(zip, id1a);
assertContentDataObject(zip, id1b);
assertContentDataObject(zip, id2);
assertPreservationDescriptionInformation(zip);
assertPackagingInformation(zip, objects, hash);
assertNull(zip.getNextEntry(), "Additional zip entries");
}
SipMetrics metrics = sipAssembler.getMetrics();
assertEquals(2, metrics.numAius(), SipMetrics.NUM_AIUS);
assertEquals(3, metrics.numDigitalObjects(), SipMetrics.NUM_DIGITAL_OBJECTS);
assertEquals(time, metrics.assemblyTime(), DELTA_MS, SipMetrics.ASSEMBLY_TIME);
assertEquals(3 * digitalObjectSize, metrics.digitalObjectsSize(), SipMetrics.SIZE_DIGITAL_OBJECTS);
assertEquals(pdiSize, metrics.pdiSize(), SipMetrics.SIZE_PDI);
// long packagingInformationSize =
// getPackagingInformationSize(packagingInformationPrototype, 2, Optional.of(hash));
// assertEquals(pdiSize + 3 * digitalObjectSize + packagingInformationSize, metrics.sipSize(),
// SipMetrics.SIZE_SIP);
assertEquals(buffer.length(), metrics.sipFileSize(), SipMetrics.SIZE_SIP_FILE);
}
use of com.opentext.ia.sdk.support.io.HashAssembler in project infoarchive-sip-sdk by Enterprise-Content-Management.
the class WhenAssemblingSips method shouldMeasurePdiSizeBeforeEnd.
@Test
void shouldMeasurePdiSizeBeforeEnd() throws IOException {
long pdiSize = randomInt(13, 313);
DataBuffer pdiBuffer = new MemoryBuffer() {
@Override
public long length() {
return pdiSize;
}
};
HashAssembler noHashAssembler = new NoHashAssembler();
Assembler<HashedContents<Object>> pdiAssembler = mock(Assembler.class);
SipAssembler<Object> sipAssembler = new SipAssembler<>(new DefaultPackagingInformationFactory(somePackagingInformation()), pdiAssembler, noHashAssembler, () -> pdiBuffer, ContentAssembler.noDedup(domainObject -> Collections.emptyIterator(), noHashAssembler));
sipAssembler.start(new MemoryBuffer());
sipAssembler.add(new Object());
SipMetrics metrics = sipAssembler.getMetrics();
assertEquals(pdiSize, metrics.pdiSize(), SipMetrics.SIZE_PDI);
}
Aggregations