Search in sources :

Example 1 with AnnotationCollection

use of de.catma.document.annotation.AnnotationCollection in project catma by forTEXT.

the class CorpusExporter method export.

public void export(String exportName, Corpus corpus, OutputStream os) throws IOException {
    OutputStream tarFileOs = new GZIPOutputStream(os);
    TarArchiveOutputStream taOut = new TarArchiveOutputStream(tarFileOs, "UTF-8");
    try {
        taOut.setLongFileMode(TarArchiveOutputStream.LONGFILE_GNU);
        taOut.setBigNumberMode(TarArchiveOutputStream.BIGNUMBER_POSIX);
        for (SourceDocument sd : corpus.getSourceDocuments()) {
            TarArchiveEntry sdEntry = new TarArchiveEntry(getSourceDocEntryName(exportName, sd));
            byte[] sdContent = sd.getContent().getBytes(Charset.forName("UTF8"));
            sdEntry.setSize(sdContent.length);
            taOut.putArchiveEntry(sdEntry);
            taOut.write(sdContent);
            taOut.closeArchiveEntry();
            for (AnnotationCollectionReference umcRef : corpus.getUserMarkupCollectionRefs(sd)) {
                AnnotationCollection umc = repo.getUserMarkupCollection(umcRef);
                TeiUserMarkupCollectionSerializationHandler handler = new TeiUserMarkupCollectionSerializationHandler(repo.getTagManager(), false);
                ByteArrayOutputStream teiDocOut = new ByteArrayOutputStream();
                handler.serialize(repo.getUserMarkupCollection(umcRef), sd, teiDocOut);
                byte[] umcContent = teiDocOut.toByteArray();
                String umcEntryName = getUmcEntryName(exportName, umc, sd);
                TarArchiveEntry umcEntry = new TarArchiveEntry(umcEntryName);
                umcEntry.setSize(umcContent.length);
                taOut.putArchiveEntry(umcEntry);
                taOut.write(umcContent);
                taOut.closeArchiveEntry();
            }
        }
    } finally {
        taOut.finish();
        taOut.close();
    }
}
Also used : AnnotationCollection(de.catma.document.annotation.AnnotationCollection) GZIPOutputStream(java.util.zip.GZIPOutputStream) OutputStream(java.io.OutputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) TarArchiveOutputStream(org.apache.commons.compress.archivers.tar.TarArchiveOutputStream) GZIPOutputStream(java.util.zip.GZIPOutputStream) SourceDocument(de.catma.document.source.SourceDocument) AnnotationCollectionReference(de.catma.document.annotation.AnnotationCollectionReference) TarArchiveOutputStream(org.apache.commons.compress.archivers.tar.TarArchiveOutputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) TeiUserMarkupCollectionSerializationHandler(de.catma.serialization.tei.TeiUserMarkupCollectionSerializationHandler) TarArchiveEntry(org.apache.commons.compress.archivers.tar.TarArchiveEntry)

Example 2 with AnnotationCollection

use of de.catma.document.annotation.AnnotationCollection in project catma by forTEXT.

the class ProjectResourceExportApiRequestHandler method serializeProjectResources.

private String serializeProjectResources() {
    try {
        Export export = new Export();
        for (SourceDocument sourceDocument : project.getSourceDocuments()) {
            ArrayList<AnnotationCollection> annotationCollections = new ArrayList<>();
            for (AnnotationCollectionReference annotationCollectionReference : sourceDocument.getUserMarkupCollectionRefs()) {
                annotationCollections.add(project.getUserMarkupCollection(annotationCollectionReference));
            }
            ArrayList<TagDefinition> tagDefinitions = new ArrayList<>();
            ArrayList<TagReference> tagReferences = new ArrayList<>();
            for (AnnotationCollection annotationCollection : annotationCollections) {
                for (TagsetDefinition tagsetDefinition : annotationCollection.getTagLibrary().getTagsetDefinitions()) {
                    tagDefinitions.addAll(tagsetDefinition.stream().collect(Collectors.toList()));
                }
                tagReferences.addAll(annotationCollection.getTagReferences());
            }
            ExportDocument exportDocument = new ExportDocument(new PreApiSourceDocument(sourceDocument, String.format("%s%s/doc/%s", BASE_URL, handlerPath.substring(1), sourceDocument.getUuid().toLowerCase())), tagDefinitions.stream().map(PreApiTagDefinition::new).collect(Collectors.toList()), tagReferences.stream().map((TagReference tagReference) -> {
                try {
                    return new PreApiAnnotation(tagReference, tagDefinitions.stream().filter(td -> td.getUuid().equals(tagReference.getTagDefinitionId())).findFirst().get(), sourceDocument);
                } catch (IOException e) {
                    logger.log(Level.WARNING, String.format("Error serializing TagReference: %s", tagReference), e);
                    return null;
                }
            }).collect(Collectors.toList()));
            export.addExportDocument(exportDocument);
        }
        return new SerializationHelper<Export>().serialize(export);
    } catch (Exception e) {
        logger.log(Level.SEVERE, "Failed to serialize project resources", e);
        return "{\"error\": \"Failed to serialize project resources, please contact CATMA support\"}";
    }
}
Also used : RequestHandler(com.vaadin.server.RequestHandler) ExportDocument(de.catma.api.pre.serialization.models.ExportDocument) VaadinRequest(com.vaadin.server.VaadinRequest) PreApiAnnotation(de.catma.api.pre.serialization.model_wrappers.PreApiAnnotation) ArrayList(java.util.ArrayList) Level(java.util.logging.Level) Export(de.catma.api.pre.serialization.models.Export) UncheckedExecutionException(com.google.common.util.concurrent.UncheckedExecutionException) TagsetDefinition(de.catma.tag.TagsetDefinition) IDGenerator(de.catma.util.IDGenerator) NoSuchElementException(java.util.NoSuchElementException) OutputStream(java.io.OutputStream) PreApiSourceDocument(de.catma.api.pre.serialization.model_wrappers.PreApiSourceDocument) CATMAPropertyKey(de.catma.properties.CATMAPropertyKey) VaadinResponse(com.vaadin.server.VaadinResponse) AnnotationCollectionReference(de.catma.document.annotation.AnnotationCollectionReference) Project(de.catma.project.Project) IOException(java.io.IOException) SourceDocument(de.catma.document.source.SourceDocument) PreApiTagDefinition(de.catma.api.pre.serialization.model_wrappers.PreApiTagDefinition) AnnotationCollection(de.catma.document.annotation.AnnotationCollection) SerializationHelper(de.catma.repository.git.serialization.SerializationHelper) Logger(java.util.logging.Logger) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) TagReference(de.catma.document.annotation.TagReference) VaadinSession(com.vaadin.server.VaadinSession) TagDefinition(de.catma.tag.TagDefinition) PreApiTagDefinition(de.catma.api.pre.serialization.model_wrappers.PreApiTagDefinition) TagDefinition(de.catma.tag.TagDefinition) AnnotationCollection(de.catma.document.annotation.AnnotationCollection) PreApiSourceDocument(de.catma.api.pre.serialization.model_wrappers.PreApiSourceDocument) PreApiSourceDocument(de.catma.api.pre.serialization.model_wrappers.PreApiSourceDocument) SourceDocument(de.catma.document.source.SourceDocument) ArrayList(java.util.ArrayList) AnnotationCollectionReference(de.catma.document.annotation.AnnotationCollectionReference) IOException(java.io.IOException) ExportDocument(de.catma.api.pre.serialization.models.ExportDocument) UncheckedExecutionException(com.google.common.util.concurrent.UncheckedExecutionException) NoSuchElementException(java.util.NoSuchElementException) IOException(java.io.IOException) PreApiTagDefinition(de.catma.api.pre.serialization.model_wrappers.PreApiTagDefinition) TagsetDefinition(de.catma.tag.TagsetDefinition) PreApiAnnotation(de.catma.api.pre.serialization.model_wrappers.PreApiAnnotation) Export(de.catma.api.pre.serialization.models.Export) TagReference(de.catma.document.annotation.TagReference)

Example 3 with AnnotationCollection

use of de.catma.document.annotation.AnnotationCollection in project catma by forTEXT.

the class ProjectView method addUploadFile.

private void addUploadFile(UploadFile uploadFile, boolean useApostropheAsSeparator, String collectionNamePattern) {
    SourceDocumentInfo sourceDocumentInfo = new SourceDocumentInfo(uploadFile.getIndexInfoSet(useApostropheAsSeparator), uploadFile.getContentInfoSet(), uploadFile.getTechInfoSet());
    SourceContentHandler contentHandler = sourceDocumentInfo.getTechInfoSet().getMimeType().equals(FileType.XML2.getMimeType()) ? new XML2ContentHandler() : new TikaContentHandler();
    contentHandler.setSourceDocumentInfo(sourceDocumentInfo);
    SourceDocument document = new SourceDocument(uploadFile.getUuid(), contentHandler);
    try {
        String content = document.getContent();
        FileOSType fileOSType = FileOSType.getFileOSType(content);
        sourceDocumentInfo.getTechInfoSet().setFileOSType(fileOSType);
        CRC32 checksum = new CRC32();
        checksum.update(content.getBytes());
        sourceDocumentInfo.getTechInfoSet().setChecksum(checksum.getValue());
        project.insert(document);
        AnnotationCollection intrinsicMarkupCollection = uploadFile.getIntrinsicMarkupCollection();
        if (intrinsicMarkupCollection != null) {
            project.importCollection(Collections.emptyList(), intrinsicMarkupCollection);
        }
        if (collectionNamePattern != null && !collectionNamePattern.isEmpty()) {
            String collectionName = collectionNamePattern.replace("{{Title}}", uploadFile.getTitle());
            project.createUserMarkupCollection(collectionName, document);
        }
    } catch (IOException e) {
        Logger.getLogger(ProjectView.class.getName()).log(Level.SEVERE, String.format("Error loading content of %1$s", uploadFile.getTempFilename().toString()), e);
        String errorMsg = e.getMessage();
        if ((errorMsg == null) || (errorMsg.trim().isEmpty())) {
            errorMsg = "";
        }
        Notification.show("Error", String.format("Error loading content of %1$s! " + "This document will be skipped!\n The underlying error message was:\n%2$s", uploadFile.getTitle(), errorMsg), Type.ERROR_MESSAGE);
    }
}
Also used : AnnotationCollection(de.catma.document.annotation.AnnotationCollection) SourceDocumentInfo(de.catma.document.source.SourceDocumentInfo) CRC32(java.util.zip.CRC32) TikaContentHandler(de.catma.document.source.contenthandler.TikaContentHandler) SourceDocument(de.catma.document.source.SourceDocument) XML2ContentHandler(de.catma.document.source.contenthandler.XML2ContentHandler) FileOSType(de.catma.document.source.FileOSType) IOException(java.io.IOException) SourceContentHandler(de.catma.document.source.contenthandler.SourceContentHandler)

Example 4 with AnnotationCollection

use of de.catma.document.annotation.AnnotationCollection in project catma by forTEXT.

the class ImportIntrinsicMarkupStep method enter.

@Override
public void enter(boolean back) {
    if (back) {
        return;
    }
    contentPanel.setEnabled(false);
    progressBar.setVisible(true);
    progressBar.setIndeterminate(true);
    @SuppressWarnings("unchecked") final ArrayList<UploadFile> files = new ArrayList<UploadFile>(((Collection<UploadFile>) wizardContext.get(DocumentWizard.WizardContextKey.UPLOAD_FILE_LIST)).stream().filter(uploadFile -> uploadFile.getMimetype().equals(FileType.XML2.getMimeType())).collect(Collectors.toList()));
    final TagManager tagmanager = new TagManager(new TagLibrary());
    BackgroundServiceProvider backgroundServiceProvider = (BackgroundServiceProvider) UI.getCurrent();
    backgroundServiceProvider.submit("inspecting-intrinsic-markup", new DefaultProgressCallable<List<UploadFile>>() {

        @Override
        public List<UploadFile> call() throws Exception {
            IDGenerator idGenerator = new IDGenerator();
            for (UploadFile uploadFile : files) {
                XML2ContentHandler contentHandler = new XML2ContentHandler();
                SourceDocument doc = new SourceDocument(uploadFile.getUuid(), contentHandler);
                SourceDocumentInfo documentInfo = new SourceDocumentInfo();
                TechInfoSet techInfoSet = new TechInfoSet();
                techInfoSet.setURI(uploadFile.getTempFilename());
                documentInfo.setTechInfoSet(techInfoSet);
                contentHandler.setSourceDocumentInfo(documentInfo);
                XmlMarkupCollectionSerializationHandler handler = new XmlMarkupCollectionSerializationHandler(tagmanager, contentHandler, project.getUser().getIdentifier());
                try (FileInputStream fis = new FileInputStream(new File(uploadFile.getTempFilename()))) {
                    AnnotationCollection collection = handler.deserialize(doc, idGenerator.generateCollectionId(), fis);
                    uploadFile.setIntrinsicMarkupCollection(collection);
                }
            }
            return files;
        }
    }, new ExecutionListener<List<UploadFile>>() {

        @Override
        public void done(List<UploadFile> result) {
            contentPanel.setEnabled(true);
            progressBar.setVisible(false);
            progressBar.setIndeterminate(false);
            fileList.clear();
            fileList.addAll(result);
            fileDataProvider.refreshAll();
            tagsetImportList.clear();
            String defaultIntrinsicXMLElmentsName = "Default Intrinsic XML Elements";
            for (TagsetDefinition tagset : tagmanager.getTagLibrary()) {
                if (!tagset.isEmpty()) {
                    TagsetDefinition targetTagset = project.getTagManager().getTagLibrary().getTagsetDefinition(tagset.getUuid());
                    boolean inProject = false;
                    if (targetTagset == null) {
                        targetTagset = tagset;
                    } else {
                        inProject = true;
                    }
                    String namespace = tagset.getName() == null ? "none" : tagset.getName();
                    if (tagset.getName() == null) {
                        tagset.setName(defaultIntrinsicXMLElmentsName);
                    }
                    TagsetImport tagsetImport = new TagsetImport(namespace, tagset, targetTagset, inProject ? TagsetImportState.WILL_BE_MERGED : TagsetImportState.WILL_BE_CREATED);
                    tagsetImportList.add(tagsetImport);
                }
            }
            tagsetDataProvider.refreshAll();
            wizardContext.put(DocumentWizard.WizardContextKey.TAGSET_IMPORT_LIST, tagsetImportList);
            if (stepChangeListener != null) {
                stepChangeListener.stepChanged(ImportIntrinsicMarkupStep.this);
            }
        }

        @Override
        public void error(Throwable t) {
            Logger.getLogger(ImportIntrinsicMarkupStep.class.getName()).log(Level.SEVERE, "Error inspecting files", t);
            String errorMsg = t.getMessage();
            if ((errorMsg == null) || (errorMsg.trim().isEmpty())) {
                errorMsg = "";
            }
            Notification.show("Error", String.format("Error inspecting the contents! " + "\n The underlying error message was:\n%1$s", errorMsg), Type.ERROR_MESSAGE);
        }
    });
}
Also used : SourceDocumentInfo(de.catma.document.source.SourceDocumentInfo) ArrayList(java.util.ArrayList) BackgroundServiceProvider(de.catma.backgroundservice.BackgroundServiceProvider) ArrayList(java.util.ArrayList) List(java.util.List) TechInfoSet(de.catma.document.source.TechInfoSet) TagLibrary(de.catma.tag.TagLibrary) AnnotationCollection(de.catma.document.annotation.AnnotationCollection) SourceDocument(de.catma.document.source.SourceDocument) XML2ContentHandler(de.catma.document.source.contenthandler.XML2ContentHandler) FileInputStream(java.io.FileInputStream) TagsetDefinition(de.catma.tag.TagsetDefinition) TagManager(de.catma.tag.TagManager) XmlMarkupCollectionSerializationHandler(de.catma.serialization.intrinsic.xml.XmlMarkupCollectionSerializationHandler) IDGenerator(de.catma.util.IDGenerator) File(java.io.File)

Example 5 with AnnotationCollection

use of de.catma.document.annotation.AnnotationCollection in project catma by forTEXT.

the class GraphWorktreeProject method loadAnnotationCollection.

public Pair<AnnotationCollection, List<TagsetDefinitionImportStatus>> loadAnnotationCollection(InputStream inputStream, SourceDocument document) throws IOException {
    TagManager tagManager = new TagManager(new TagLibrary());
    TeiTagLibrarySerializationHandler tagLibrarySerializationHandler = new TeiTagLibrarySerializationHandler(tagManager);
    TagLibrary importedLibrary = tagLibrarySerializationHandler.deserialize(null, inputStream);
    List<String> resourceIds = gitProjectHandler.getResourceIds();
    List<TagsetDefinitionImportStatus> tagsetDefinitionImportStatusList = new ArrayList<>();
    for (TagsetDefinition tagset : importedLibrary) {
        boolean inProjectHistory = resourceIds.contains(tagset.getUuid());
        boolean current = inProjectHistory && (getTagManager().getTagLibrary().getTagsetDefinition(tagset.getUuid()) != null);
        tagsetDefinitionImportStatusList.add(new TagsetDefinitionImportStatus(tagset, inProjectHistory, current));
    }
    String collectionId = idGenerator.generate();
    TeiUserMarkupCollectionDeserializer deserializer = new TeiUserMarkupCollectionDeserializer(tagLibrarySerializationHandler.getTeiDocument(), tagManager.getTagLibrary(), collectionId);
    AnnotationCollection annotationCollection = new AnnotationCollection(collectionId, tagLibrarySerializationHandler.getTeiDocument().getContentInfoSet(), tagManager.getTagLibrary(), deserializer.getTagReferences(), document.getUuid(), document.getRevisionHash());
    return new Pair<>(annotationCollection, tagsetDefinitionImportStatusList);
}
Also used : TagLibrary(de.catma.tag.TagLibrary) AnnotationCollection(de.catma.document.annotation.AnnotationCollection) TeiTagLibrarySerializationHandler(de.catma.serialization.tei.TeiTagLibrarySerializationHandler) ArrayList(java.util.ArrayList) TeiUserMarkupCollectionDeserializer(de.catma.serialization.tei.TeiUserMarkupCollectionDeserializer) TagsetDefinition(de.catma.tag.TagsetDefinition) TagManager(de.catma.tag.TagManager) TagsetDefinitionImportStatus(de.catma.serialization.TagsetDefinitionImportStatus) Pair(de.catma.util.Pair)

Aggregations

AnnotationCollection (de.catma.document.annotation.AnnotationCollection)25 SourceDocument (de.catma.document.source.SourceDocument)15 TagsetDefinition (de.catma.tag.TagsetDefinition)14 IOException (java.io.IOException)14 ArrayList (java.util.ArrayList)12 AnnotationCollectionReference (de.catma.document.annotation.AnnotationCollectionReference)11 TagReference (de.catma.document.annotation.TagReference)11 List (java.util.List)11 Pair (de.catma.util.Pair)10 TagLibrary (de.catma.tag.TagLibrary)9 TagDefinition (de.catma.tag.TagDefinition)8 IDGenerator (de.catma.util.IDGenerator)8 ContentInfoSet (de.catma.document.source.ContentInfoSet)7 Property (de.catma.tag.Property)7 TagInstance (de.catma.tag.TagInstance)7 TagManager (de.catma.tag.TagManager)7 URISyntaxException (java.net.URISyntaxException)7 Collection (java.util.Collection)7 Logger (java.util.logging.Logger)7 Collectors (java.util.stream.Collectors)7