Search in sources :

Example 1 with TikaContentHandler

use of de.catma.document.source.contenthandler.TikaContentHandler in project catma by forTEXT.

the class ProjectView method addUploadFile.

private void addUploadFile(UploadFile uploadFile, boolean useApostropheAsSeparator, String collectionNamePattern) {
    SourceDocumentInfo sourceDocumentInfo = new SourceDocumentInfo(uploadFile.getIndexInfoSet(useApostropheAsSeparator), uploadFile.getContentInfoSet(), uploadFile.getTechInfoSet());
    SourceContentHandler contentHandler = sourceDocumentInfo.getTechInfoSet().getMimeType().equals(FileType.XML2.getMimeType()) ? new XML2ContentHandler() : new TikaContentHandler();
    contentHandler.setSourceDocumentInfo(sourceDocumentInfo);
    SourceDocument document = new SourceDocument(uploadFile.getUuid(), contentHandler);
    try {
        String content = document.getContent();
        FileOSType fileOSType = FileOSType.getFileOSType(content);
        sourceDocumentInfo.getTechInfoSet().setFileOSType(fileOSType);
        CRC32 checksum = new CRC32();
        checksum.update(content.getBytes());
        sourceDocumentInfo.getTechInfoSet().setChecksum(checksum.getValue());
        project.insert(document);
        AnnotationCollection intrinsicMarkupCollection = uploadFile.getIntrinsicMarkupCollection();
        if (intrinsicMarkupCollection != null) {
            project.importCollection(Collections.emptyList(), intrinsicMarkupCollection);
        }
        if (collectionNamePattern != null && !collectionNamePattern.isEmpty()) {
            String collectionName = collectionNamePattern.replace("{{Title}}", uploadFile.getTitle());
            project.createUserMarkupCollection(collectionName, document);
        }
    } catch (IOException e) {
        Logger.getLogger(ProjectView.class.getName()).log(Level.SEVERE, String.format("Error loading content of %1$s", uploadFile.getTempFilename().toString()), e);
        String errorMsg = e.getMessage();
        if ((errorMsg == null) || (errorMsg.trim().isEmpty())) {
            errorMsg = "";
        }
        Notification.show("Error", String.format("Error loading content of %1$s! " + "This document will be skipped!\n The underlying error message was:\n%2$s", uploadFile.getTitle(), errorMsg), Type.ERROR_MESSAGE);
    }
}
Also used : AnnotationCollection(de.catma.document.annotation.AnnotationCollection) SourceDocumentInfo(de.catma.document.source.SourceDocumentInfo) CRC32(java.util.zip.CRC32) TikaContentHandler(de.catma.document.source.contenthandler.TikaContentHandler) SourceDocument(de.catma.document.source.SourceDocument) XML2ContentHandler(de.catma.document.source.contenthandler.XML2ContentHandler) FileOSType(de.catma.document.source.FileOSType) IOException(java.io.IOException) SourceContentHandler(de.catma.document.source.contenthandler.SourceContentHandler)

Aggregations

AnnotationCollection (de.catma.document.annotation.AnnotationCollection)1 FileOSType (de.catma.document.source.FileOSType)1 SourceDocument (de.catma.document.source.SourceDocument)1 SourceDocumentInfo (de.catma.document.source.SourceDocumentInfo)1 SourceContentHandler (de.catma.document.source.contenthandler.SourceContentHandler)1 TikaContentHandler (de.catma.document.source.contenthandler.TikaContentHandler)1 XML2ContentHandler (de.catma.document.source.contenthandler.XML2ContentHandler)1 IOException (java.io.IOException)1 CRC32 (java.util.zip.CRC32)1