Search in sources :

Example 1 with SourceDocument

use of de.tudarmstadt.ukp.clarin.webanno.export.model.SourceDocument in project webanno by webanno.

the class ExportUtil method exportProjectSettings.

public static de.tudarmstadt.ukp.clarin.webanno.export.model.Project exportProjectSettings(AnnotationSchemaService annotationService, Optional<AutomationService> automationService, DocumentService documentService, ProjectService projectService, Project aProject, File aProjectSettings, File aExportTempDir) {
    de.tudarmstadt.ukp.clarin.webanno.export.model.Project exProjekt = new de.tudarmstadt.ukp.clarin.webanno.export.model.Project();
    exProjekt.setDescription(aProject.getDescription());
    exProjekt.setName(aProject.getName());
    // In older versions of WebAnno, the mode was an enum which was serialized as upper-case
    // during export but as lower-case in the database. This is compensating for this case.
    exProjekt.setMode(StringUtils.upperCase(aProject.getMode(), Locale.US));
    exProjekt.setScriptDirection(aProject.getScriptDirection());
    exProjekt.setVersion(aProject.getVersion());
    exProjekt.setDisableExport(aProject.isDisableExport());
    exProjekt.setCreated(aProject.getCreated());
    exProjekt.setUpdated(aProject.getUpdated());
    List<de.tudarmstadt.ukp.clarin.webanno.export.model.AnnotationLayer> exLayers = new ArrayList<>();
    // Store map of layer and its equivalent exLayer so that the attach type is attached later
    Map<AnnotationLayer, de.tudarmstadt.ukp.clarin.webanno.export.model.AnnotationLayer> layerToExLayers = new HashMap<>();
    // Store map of feature and its equivalent exFeature so that the attach feature is attached
    // later
    Map<AnnotationFeature, de.tudarmstadt.ukp.clarin.webanno.export.model.AnnotationFeature> featureToExFeatures = new HashMap<>();
    for (AnnotationLayer layer : annotationService.listAnnotationLayer(aProject)) {
        exLayers.add(ImportUtil.exportLayerDetails(layerToExLayers, featureToExFeatures, layer, annotationService));
    }
    // exported feature
    for (AnnotationLayer layer : layerToExLayers.keySet()) {
        if (layer.getAttachType() != null) {
            layerToExLayers.get(layer).setAttachType(layerToExLayers.get(layer.getAttachType()));
        }
        if (layer.getAttachFeature() != null) {
            layerToExLayers.get(layer).setAttachFeature(featureToExFeatures.get(layer.getAttachFeature()));
        }
    }
    exProjekt.setLayers(exLayers);
    List<ExportedTagSet> extTagSets = new ArrayList<>();
    for (TagSet tagSet : annotationService.listTagSets(aProject)) {
        ExportedTagSet exTagSet = new ExportedTagSet();
        exTagSet.setCreateTag(tagSet.isCreateTag());
        exTagSet.setDescription(tagSet.getDescription());
        exTagSet.setLanguage(tagSet.getLanguage());
        exTagSet.setName(tagSet.getName());
        List<ExportedTag> exTags = new ArrayList<>();
        for (Tag tag : annotationService.listTags(tagSet)) {
            ExportedTag exTag = new ExportedTag();
            exTag.setDescription(tag.getDescription());
            exTag.setName(tag.getName());
            exTags.add(exTag);
        }
        exTagSet.setTags(exTags);
        extTagSets.add(exTagSet);
    }
    exProjekt.setTagSets(extTagSets);
    List<SourceDocument> sourceDocuments = new ArrayList<>();
    List<AnnotationDocument> annotationDocuments = new ArrayList<>();
    // add source documents to a project
    List<de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument> documents = documentService.listSourceDocuments(aProject);
    for (de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument sourceDocument : documents) {
        SourceDocument exDocument = new SourceDocument();
        exDocument.setFormat(sourceDocument.getFormat());
        exDocument.setName(sourceDocument.getName());
        exDocument.setState(sourceDocument.getState());
        exDocument.setTimestamp(sourceDocument.getTimestamp());
        exDocument.setSentenceAccessed(sourceDocument.getSentenceAccessed());
        exDocument.setCreated(sourceDocument.getCreated());
        exDocument.setUpdated(sourceDocument.getUpdated());
        // add annotation document to Project
        for (de.tudarmstadt.ukp.clarin.webanno.model.AnnotationDocument annotationDocument : documentService.listAnnotationDocuments(sourceDocument)) {
            AnnotationDocument annotationDocumentToExport = new AnnotationDocument();
            annotationDocumentToExport.setName(annotationDocument.getName());
            annotationDocumentToExport.setState(annotationDocument.getState());
            annotationDocumentToExport.setUser(annotationDocument.getUser());
            annotationDocumentToExport.setTimestamp(annotationDocument.getTimestamp());
            annotationDocumentToExport.setSentenceAccessed(annotationDocument.getSentenceAccessed());
            annotationDocumentToExport.setCreated(annotationDocument.getCreated());
            annotationDocumentToExport.setUpdated(annotationDocument.getUpdated());
            annotationDocuments.add(annotationDocumentToExport);
        }
        sourceDocuments.add(exDocument);
    }
    exProjekt.setSourceDocuments(sourceDocuments);
    exProjekt.setAnnotationDocuments(annotationDocuments);
    if (automationService.isPresent()) {
        List<de.tudarmstadt.ukp.clarin.webanno.export.model.TrainingDocument> trainDocuments = new ArrayList<>();
        List<TrainingDocument> trainingDocuments = automationService.get().listTrainingDocuments(aProject);
        Map<String, de.tudarmstadt.ukp.clarin.webanno.export.model.AnnotationFeature> fm = new HashMap<>();
        for (de.tudarmstadt.ukp.clarin.webanno.export.model.AnnotationFeature f : featureToExFeatures.values()) {
            fm.put(f.getName(), f);
        }
        for (TrainingDocument trainingDocument : trainingDocuments) {
            de.tudarmstadt.ukp.clarin.webanno.export.model.TrainingDocument exDocument = new de.tudarmstadt.ukp.clarin.webanno.export.model.TrainingDocument();
            exDocument.setFormat(trainingDocument.getFormat());
            exDocument.setName(trainingDocument.getName());
            exDocument.setState(trainingDocument.getState());
            exDocument.setTimestamp(trainingDocument.getTimestamp());
            exDocument.setSentenceAccessed(trainingDocument.getSentenceAccessed());
            if (trainingDocument.getFeature() != null) {
                exDocument.setFeature(fm.get(trainingDocument.getFeature().getName()));
            }
            trainDocuments.add(exDocument);
        }
        exProjekt.setTrainingDocuments(trainDocuments);
    } else {
        exProjekt.setTrainingDocuments(new ArrayList<>());
    }
    List<ProjectPermission> projectPermissions = new ArrayList<>();
    // add project permissions to the project
    for (User user : projectService.listProjectUsersWithPermissions(aProject)) {
        for (de.tudarmstadt.ukp.clarin.webanno.model.ProjectPermission permission : projectService.listProjectPermissionLevel(user, aProject)) {
            ProjectPermission permissionToExport = new ProjectPermission();
            permissionToExport.setLevel(permission.getLevel());
            permissionToExport.setUser(user.getUsername());
            projectPermissions.add(permissionToExport);
        }
    }
    exProjekt.setProjectPermissions(projectPermissions);
    // export automation Mira template
    if (automationService.isPresent()) {
        List<de.tudarmstadt.ukp.clarin.webanno.export.model.MiraTemplate> exTemplates = new ArrayList<>();
        for (MiraTemplate template : automationService.get().listMiraTemplates(aProject)) {
            de.tudarmstadt.ukp.clarin.webanno.export.model.MiraTemplate exTemplate = new de.tudarmstadt.ukp.clarin.webanno.export.model.MiraTemplate();
            exTemplate.setAnnotateAndPredict(template.isAnnotateAndRepeat());
            exTemplate.setAutomationStarted(template.isAutomationStarted());
            exTemplate.setCurrentLayer(template.isCurrentLayer());
            exTemplate.setResult(template.getResult());
            exTemplate.setTrainFeature(featureToExFeatures.get(template.getTrainFeature()));
            if (template.getOtherFeatures().size() > 0) {
                Set<de.tudarmstadt.ukp.clarin.webanno.export.model.AnnotationFeature> exOtherFeatures = new HashSet<>();
                for (AnnotationFeature feature : template.getOtherFeatures()) {
                    exOtherFeatures.add(featureToExFeatures.get(feature));
                }
                exTemplate.setOtherFeatures(exOtherFeatures);
            }
            exTemplates.add(exTemplate);
        }
        exProjekt.setMiraTemplates(exTemplates);
    } else {
        exProjekt.setMiraTemplates(new ArrayList<>());
    }
    return exProjekt;
}
Also used : Mode(de.tudarmstadt.ukp.clarin.webanno.model.Mode) User(de.tudarmstadt.ukp.clarin.webanno.security.model.User) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) AnnotationDocument(de.tudarmstadt.ukp.clarin.webanno.export.model.AnnotationDocument) AnnotationLayer(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationLayer) ExportedTagSet(de.tudarmstadt.ukp.clarin.webanno.export.model.ExportedTagSet) TagSet(de.tudarmstadt.ukp.clarin.webanno.model.TagSet) ExportedTagSet(de.tudarmstadt.ukp.clarin.webanno.export.model.ExportedTagSet) ProjectPermission(de.tudarmstadt.ukp.clarin.webanno.export.model.ProjectPermission) AnnotationFeature(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature) HashSet(java.util.HashSet) SourceDocument(de.tudarmstadt.ukp.clarin.webanno.export.model.SourceDocument) Project(de.tudarmstadt.ukp.clarin.webanno.model.Project) MiraTemplate(de.tudarmstadt.ukp.clarin.webanno.automation.model.MiraTemplate) ExportedTag(de.tudarmstadt.ukp.clarin.webanno.export.model.ExportedTag) Tag(de.tudarmstadt.ukp.clarin.webanno.model.Tag) ExportedTag(de.tudarmstadt.ukp.clarin.webanno.export.model.ExportedTag) TrainingDocument(de.tudarmstadt.ukp.clarin.webanno.model.TrainingDocument)

Example 2 with SourceDocument

use of de.tudarmstadt.ukp.clarin.webanno.export.model.SourceDocument in project webanno by webanno.

the class ExportUtil method exportAnnotationDocuments.

/**
 * Copy annotation document as Serialized CAS from the file system of this project to the
 * export folder.
 */
public static void exportAnnotationDocuments(DocumentService documentService, ImportExportService importExportService, UserDao userRepository, ProjectExportRequest aModel, File aCopyDir) throws IOException, UIMAException, ClassNotFoundException {
    Project project = aModel.project.getObject();
    List<de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument> documents = documentService.listSourceDocuments(project);
    int i = 1;
    int initProgress = aModel.progress;
    for (de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument sourceDocument : documents) {
        // 
        // Export initial CASes
        // 
        // The initial CAS must always be exported to ensure that the converted source document
        // will *always* have the state it had at the time of the initial import. We we do have
        // a reliably initial CAS and instead lazily convert whenever an annotator starts
        // annotating, then we could end up with two annotators having two different versions of
        // their CAS e.g. if there was a code change in the reader component that affects its
        // output.
        // If the initial CAS does not exist yet, it must be created before export.
        documentService.createOrReadInitialCas(sourceDocument);
        File targetDir = new File(aCopyDir.getAbsolutePath() + ANNOTATION_CAS_FOLDER + sourceDocument.getName());
        FileUtils.forceMkdir(targetDir);
        File initialCasFile = documentService.getCasFile(sourceDocument, INITIAL_CAS_PSEUDO_USER);
        FileUtils.copyFileToDirectory(initialCasFile, targetDir);
        LOG.info("Exported annotation document content for user [" + INITIAL_CAS_PSEUDO_USER + "] for source document [" + sourceDocument.getId() + "] in project [" + project.getName() + "] with id [" + project.getId() + "]");
        // 
        // Export per-user annotation document
        // 
        // Determine which format to use for export
        String formatId;
        if (FORMAT_AUTO.equals(aModel.format)) {
            formatId = sourceDocument.getFormat();
        } else {
            formatId = importExportService.getWritableFormatId(aModel.format);
        }
        Class<?> writer = importExportService.getWritableFormats().get(formatId);
        if (writer == null) {
            String msg = "[" + sourceDocument.getName() + "] No writer found for format [" + formatId + "] - exporting as WebAnno TSV instead.";
            // Avoid repeating the same message over for different users
            if (!aModel.messages.contains(msg)) {
                aModel.messages.add(msg);
            }
            writer = WebannoTsv3XWriter.class;
        }
        // Export annotations from regular users
        for (de.tudarmstadt.ukp.clarin.webanno.model.AnnotationDocument annotationDocument : documentService.listAnnotationDocuments(sourceDocument)) {
            // annotation document is not NEW/IGNORE
            if (userRepository.get(annotationDocument.getUser()) != null && !annotationDocument.getState().equals(AnnotationDocumentState.NEW) && !annotationDocument.getState().equals(AnnotationDocumentState.IGNORE)) {
                File annotationDocumentAsSerialisedCasDir = new File(aCopyDir.getAbsolutePath() + ANNOTATION_CAS_FOLDER + sourceDocument.getName());
                File annotationDocumentDir = new File(aCopyDir.getAbsolutePath() + ANNOTATION_ORIGINAL_FOLDER + sourceDocument.getName());
                FileUtils.forceMkdir(annotationDocumentAsSerialisedCasDir);
                FileUtils.forceMkdir(annotationDocumentDir);
                File annotationFileAsSerialisedCas = documentService.getCasFile(sourceDocument, annotationDocument.getUser());
                File annotationFile = null;
                if (annotationFileAsSerialisedCas.exists() && writer != null) {
                    annotationFile = importExportService.exportAnnotationDocument(sourceDocument, annotationDocument.getUser(), writer, annotationDocument.getUser(), Mode.ANNOTATION, false);
                }
                if (annotationFileAsSerialisedCas.exists()) {
                    FileUtils.copyFileToDirectory(annotationFileAsSerialisedCas, annotationDocumentAsSerialisedCasDir);
                    if (writer != null) {
                        FileUtils.copyFileToDirectory(annotationFile, annotationDocumentDir);
                        FileUtils.forceDelete(annotationFile);
                    }
                }
                LOG.info("Exported annotation document content for user [" + annotationDocument.getUser() + "] for source document [" + sourceDocument.getId() + "] in project [" + project.getName() + "] with id [" + project.getId() + "]");
            }
        }
        // folder as CURATION_FOLDER
        if (WebAnnoConst.PROJECT_TYPE_AUTOMATION.equals(project.getMode()) || WebAnnoConst.PROJECT_TYPE_CORRECTION.equals(project.getMode())) {
            File correctionCasFile = documentService.getCasFile(sourceDocument, CORRECTION_USER);
            if (correctionCasFile.exists()) {
                // Copy CAS - this is used when importing the project again
                File curationCasDir = new File(aCopyDir + CURATION_AS_SERIALISED_CAS + sourceDocument.getName());
                FileUtils.forceMkdir(curationCasDir);
                FileUtils.copyFileToDirectory(correctionCasFile, curationCasDir);
                // Copy secondary export format for convenience - not used during import
                File curationDir = new File(aCopyDir + CURATION_FOLDER + sourceDocument.getName());
                FileUtils.forceMkdir(curationDir);
                File correctionFile = importExportService.exportAnnotationDocument(sourceDocument, CORRECTION_USER, writer, CORRECTION_USER, Mode.CORRECTION);
                FileUtils.copyFileToDirectory(correctionFile, curationDir);
                FileUtils.forceDelete(correctionFile);
            }
        }
        // END FIXME #1224 CURATION_USER and CORRECTION_USER files should be exported in
        // annotation_ser
        aModel.progress = initProgress + (int) Math.ceil(((double) i) / documents.size() * 80.0);
        i++;
    }
}
Also used : Mode(de.tudarmstadt.ukp.clarin.webanno.model.Mode) SourceDocument(de.tudarmstadt.ukp.clarin.webanno.export.model.SourceDocument) Project(de.tudarmstadt.ukp.clarin.webanno.model.Project) File(java.io.File)

Example 3 with SourceDocument

use of de.tudarmstadt.ukp.clarin.webanno.export.model.SourceDocument in project webanno by webanno.

the class ExportUtil method exportSourceDocuments.

/**
 * Copy source documents from the file system of this project to the export folder
 */
public static void exportSourceDocuments(DocumentService documentService, ProjectExportRequest model, Project aProject, File aCopyDir) throws IOException, ProjectExportException {
    File sourceDocumentDir = new File(aCopyDir + SOURCE_FOLDER);
    FileUtils.forceMkdir(sourceDocumentDir);
    // Get all the source documents from the project
    List<de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument> documents = documentService.listSourceDocuments(aProject);
    int i = 1;
    for (de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument sourceDocument : documents) {
        try {
            FileUtils.copyFileToDirectory(documentService.getSourceDocumentFile(sourceDocument), sourceDocumentDir);
            model.progress = (int) Math.ceil(((double) i) / documents.size() * 10.0);
            i++;
            LOG.info("Exported content for source document [" + sourceDocument.getId() + "] in project [" + aProject.getName() + "] with id [" + aProject.getId() + "]");
        } catch (FileNotFoundException e) {
            // error(e.getMessage());
            StringBuilder errorMessage = new StringBuilder();
            errorMessage.append("Source file '");
            errorMessage.append(sourceDocument.getName());
            errorMessage.append("' related to project couldn't be located in repository");
            LOG.error(errorMessage.toString(), ExceptionUtils.getRootCause(e));
            model.messages.add(errorMessage.toString());
            throw new ProjectExportException("Couldn't find some source file(s) related to project");
        // continue;
        }
    }
}
Also used : Mode(de.tudarmstadt.ukp.clarin.webanno.model.Mode) SourceDocument(de.tudarmstadt.ukp.clarin.webanno.export.model.SourceDocument) FileNotFoundException(java.io.FileNotFoundException) File(java.io.File)

Example 4 with SourceDocument

use of de.tudarmstadt.ukp.clarin.webanno.export.model.SourceDocument in project webanno by webanno.

the class ExportUtil method exportCuratedDocuments.

/**
 * Copy, if exists, curation documents to a folder that will be exported as Zip file
 *
 * @param aCopyDir
 *            The folder where curated documents are copied to be exported as Zip File
 */
public static void exportCuratedDocuments(DocumentService documentService, ImportExportService importExportService, ProjectExportRequest aModel, File aCopyDir, boolean aIncludeInProgress) throws UIMAException, IOException, ClassNotFoundException, ProjectExportException {
    Project project = aModel.project.getObject();
    // Get all the source documents from the project
    List<de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument> documents = documentService.listSourceDocuments(project);
    // Determine which format to use for export.
    Class<?> writer;
    if (FORMAT_AUTO.equals(aModel.format)) {
        writer = WebannoTsv3XWriter.class;
    } else {
        writer = importExportService.getWritableFormats().get(importExportService.getWritableFormatId(aModel.format));
        if (writer == null) {
            writer = WebannoTsv3XWriter.class;
        }
    }
    int initProgress = aModel.progress - 1;
    int i = 1;
    for (de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument sourceDocument : documents) {
        File curationCasDir = new File(aCopyDir + CURATION_AS_SERIALISED_CAS + sourceDocument.getName());
        FileUtils.forceMkdir(curationCasDir);
        File curationDir = new File(aCopyDir + CURATION_FOLDER + sourceDocument.getName());
        FileUtils.forceMkdir(curationDir);
        // finished or also the ones that are in progress
        if ((aIncludeInProgress && SourceDocumentState.CURATION_IN_PROGRESS.equals(sourceDocument.getState())) || SourceDocumentState.CURATION_FINISHED.equals(sourceDocument.getState())) {
            File curationCasFile = documentService.getCasFile(sourceDocument, WebAnnoConst.CURATION_USER);
            if (curationCasFile.exists()) {
                // Copy CAS - this is used when importing the project again
                FileUtils.copyFileToDirectory(curationCasFile, curationCasDir);
                // Copy secondary export format for convenience - not used during import
                try {
                    File curationFile = importExportService.exportAnnotationDocument(sourceDocument, WebAnnoConst.CURATION_USER, writer, WebAnnoConst.CURATION_USER, Mode.CURATION);
                    FileUtils.copyFileToDirectory(curationFile, curationDir);
                    FileUtils.forceDelete(curationFile);
                } catch (Exception e) {
                    // ExceptionUtils.getRootCauseMessage(e) );
                    throw new ProjectExportException("Aborting due to unrecoverable error while exporting!");
                }
            }
        }
        aModel.progress = initProgress + (int) Math.ceil(((double) i) / documents.size() * 10.0);
        i++;
    }
}
Also used : Mode(de.tudarmstadt.ukp.clarin.webanno.model.Mode) SourceDocument(de.tudarmstadt.ukp.clarin.webanno.export.model.SourceDocument) UIMAException(org.apache.uima.UIMAException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) Project(de.tudarmstadt.ukp.clarin.webanno.model.Project) File(java.io.File)

Aggregations

SourceDocument (de.tudarmstadt.ukp.clarin.webanno.export.model.SourceDocument)4 Mode (de.tudarmstadt.ukp.clarin.webanno.model.Mode)4 Project (de.tudarmstadt.ukp.clarin.webanno.model.Project)3 File (java.io.File)3 FileNotFoundException (java.io.FileNotFoundException)2 MiraTemplate (de.tudarmstadt.ukp.clarin.webanno.automation.model.MiraTemplate)1 AnnotationDocument (de.tudarmstadt.ukp.clarin.webanno.export.model.AnnotationDocument)1 ExportedTag (de.tudarmstadt.ukp.clarin.webanno.export.model.ExportedTag)1 ExportedTagSet (de.tudarmstadt.ukp.clarin.webanno.export.model.ExportedTagSet)1 ProjectPermission (de.tudarmstadt.ukp.clarin.webanno.export.model.ProjectPermission)1 AnnotationFeature (de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature)1 AnnotationLayer (de.tudarmstadt.ukp.clarin.webanno.model.AnnotationLayer)1 Tag (de.tudarmstadt.ukp.clarin.webanno.model.Tag)1 TagSet (de.tudarmstadt.ukp.clarin.webanno.model.TagSet)1 TrainingDocument (de.tudarmstadt.ukp.clarin.webanno.model.TrainingDocument)1 User (de.tudarmstadt.ukp.clarin.webanno.security.model.User)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1