use of de.catma.document.source.contenthandler.TikaContentHandler in project catma by forTEXT.
the class ProjectView method addUploadFile.
private void addUploadFile(UploadFile uploadFile, boolean useApostropheAsSeparator, String collectionNamePattern) {
SourceDocumentInfo sourceDocumentInfo = new SourceDocumentInfo(uploadFile.getIndexInfoSet(useApostropheAsSeparator), uploadFile.getContentInfoSet(), uploadFile.getTechInfoSet());
SourceContentHandler contentHandler = sourceDocumentInfo.getTechInfoSet().getMimeType().equals(FileType.XML2.getMimeType()) ? new XML2ContentHandler() : new TikaContentHandler();
contentHandler.setSourceDocumentInfo(sourceDocumentInfo);
SourceDocument document = new SourceDocument(uploadFile.getUuid(), contentHandler);
try {
String content = document.getContent();
FileOSType fileOSType = FileOSType.getFileOSType(content);
sourceDocumentInfo.getTechInfoSet().setFileOSType(fileOSType);
CRC32 checksum = new CRC32();
checksum.update(content.getBytes());
sourceDocumentInfo.getTechInfoSet().setChecksum(checksum.getValue());
project.insert(document);
AnnotationCollection intrinsicMarkupCollection = uploadFile.getIntrinsicMarkupCollection();
if (intrinsicMarkupCollection != null) {
project.importCollection(Collections.emptyList(), intrinsicMarkupCollection);
}
if (collectionNamePattern != null && !collectionNamePattern.isEmpty()) {
String collectionName = collectionNamePattern.replace("{{Title}}", uploadFile.getTitle());
project.createUserMarkupCollection(collectionName, document);
}
} catch (IOException e) {
Logger.getLogger(ProjectView.class.getName()).log(Level.SEVERE, String.format("Error loading content of %1$s", uploadFile.getTempFilename().toString()), e);
String errorMsg = e.getMessage();
if ((errorMsg == null) || (errorMsg.trim().isEmpty())) {
errorMsg = "";
}
Notification.show("Error", String.format("Error loading content of %1$s! " + "This document will be skipped!\n The underlying error message was:\n%2$s", uploadFile.getTitle(), errorMsg), Type.ERROR_MESSAGE);
}
}
Aggregations