Search in sources :

Example 31 with SourceDocument

use of de.catma.document.source.SourceDocument in project catma by forTEXT.

the class CommentQuery method execute.

@Override
protected QueryResult execute() throws Exception {
    QueryOptions queryOptions = getQueryOptions();
    WildcardTermExtractor termExtractor = new WildcardTermExtractor(commentPhrase, queryOptions.getUnseparableCharacterSequences(), queryOptions.getUserDefinedSeparatingCharacters(), queryOptions.getLocale());
    List<String> termList = termExtractor.getOrderedTerms();
    Indexer indexer = queryOptions.getIndexer();
    QueryResult result = indexer.searchCommentPhrase(queryOptions.getQueryId(), queryOptions.getRelevantSourceDocumentIDs(), termList, queryOptions.getLimit(), queryOptions.getUnseparableCharacterSequences(), queryOptions.getUserDefinedSeparatingCharacters(), queryOptions.getLocale());
    Project repository = queryOptions.getRepository();
    for (QueryResultRow row : result) {
        SourceDocument sd = repository.getSourceDocument(row.getSourceDocumentId());
        row.setPhrase(sd.getContent(row.getRange()));
    }
    return result;
}
Also used : Project(de.catma.project.Project) QueryResult(de.catma.queryengine.result.QueryResult) Indexer(de.catma.indexer.Indexer) QueryResultRow(de.catma.queryengine.result.QueryResultRow) WildcardTermExtractor(de.catma.indexer.WildcardTermExtractor) SourceDocument(de.catma.document.source.SourceDocument)

Example 32 with SourceDocument

use of de.catma.document.source.SourceDocument in project catma by forTEXT.

the class JSONQueryResultBuilder method createJSONQueryResult.

public ArrayNode createJSONQueryResult(final Iterable<QueryResultRow> queryResult, final Project project) throws IOException {
    LoadingCache<String, SourceDocInfo> sourceDocInfoCache = CacheBuilder.newBuilder().maximumSize(10).build(new CacheLoader<String, SourceDocInfo>() {

        @Override
        public SourceDocInfo load(String key) throws Exception {
            SourceDocument sd = project.getSourceDocument(key);
            boolean unload = !sd.isLoaded();
            try {
                long size = sd.getLength();
                return new SourceDocInfo(size, sd.getSourceContentHandler().getSourceDocumentInfo().getContentInfoSet());
            } finally {
                if (unload) {
                    sd.unload();
                }
            }
        }
    });
    LoadingCache<String, String> colorCache = CacheBuilder.newBuilder().build(new CacheLoader<String, String>() {

        @Override
        public String load(String tagDefinitionId) throws Exception {
            return "#" + ColorConverter.toHex(project.getTagManager().getTagLibrary().getTagDefinition(tagDefinitionId).getColor());
        }
    });
    JsonNodeFactory factory = JsonNodeFactory.instance;
    ArrayNode valuesArray = factory.arrayNode();
    for (QueryResultRow row : queryResult) {
        ObjectNode rowNode = factory.objectNode();
        addQueryResultRowFields(rowNode, row);
        if (row instanceof TagQueryResultRow) {
            List<Range> mergedRanges = Range.mergeRanges(new TreeSet<>(((TagQueryResultRow) row).getRanges()));
            for (Range range : mergedRanges) {
                addTagQueryResultRowFields(rowNode, (TagQueryResultRow) row, range, colorCache);
            }
        } else {
            rowNode.put(Field.startOffset.name(), row.getRange().getStartPoint());
            rowNode.put(Field.endOffset.name(), row.getRange().getEndPoint());
        }
        SourceDocInfo info;
        try {
            info = sourceDocInfoCache.get(row.getSourceDocumentId());
        } catch (ExecutionException e) {
            throw new IOException(e);
        }
        rowNode.put(Field.sourceDocumentSize.name(), info.size);
        rowNode.put(Field.sourceDocumentTitle.name(), info.contentInfoSet.getTitle());
        valuesArray.add(rowNode);
    }
    return valuesArray;
}
Also used : ObjectNode(com.fasterxml.jackson.databind.node.ObjectNode) QueryResultRow(de.catma.queryengine.result.QueryResultRow) TagQueryResultRow(de.catma.queryengine.result.TagQueryResultRow) SourceDocument(de.catma.document.source.SourceDocument) IOException(java.io.IOException) Range(de.catma.document.Range) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) JsonNodeFactory(com.fasterxml.jackson.databind.node.JsonNodeFactory) TagQueryResultRow(de.catma.queryengine.result.TagQueryResultRow) ArrayNode(com.fasterxml.jackson.databind.node.ArrayNode) ExecutionException(java.util.concurrent.ExecutionException)

Example 33 with SourceDocument

use of de.catma.document.source.SourceDocument in project catma by forTEXT.

the class GitSourceDocumentHandlerTest method update.

// // how to test for exceptions: https://stackoverflow.com/a/31826781
// @Rule
// public ExpectedException thrown = ExpectedException.none();
// 
// @Test
// public void delete() throws Exception {
// try (ILocalGitRepositoryManager jGitRepoManager = new JGitRepoManager(this.catmaProperties.getProperty(RepositoryPropertyKey.GitBasedRepositoryBasePath.name()), this.catmaUser)) {
// GitSourceDocumentHandler gitSourceDocumentHandler = new GitSourceDocumentHandler(
// jGitRepoManager, this.gitLabServerManager
// );
// 
// thrown.expect(IOException.class);
// thrown.expectMessage("Not implemented");
// gitSourceDocumentHandler.delete("fakeProjectId", "fakeSourceDocumentId");
// }
// }
// 
// @Test
// public void open() throws Exception {
// try (JGitRepoManager jGitRepoManager = new JGitRepoManager(this.catmaProperties.getProperty(RepositoryPropertyKey.GitBasedRepositoryBasePath.name()), this.catmaUser)) {
// this.directoriesToDeleteOnTearDown.add(jGitRepoManager.getRepositoryBasePath());
// 
// HashMap<String, Object> getJsonLdWebAnnotationResult = JsonLdWebAnnotationTest.getJsonLdWebAnnotation(
// jGitRepoManager, this.gitLabServerManager, this.catmaUser
// );
// 
// String projectId = (String)getJsonLdWebAnnotationResult.get("projectUuid");
// String sourceDocumentId = (String)getJsonLdWebAnnotationResult.get("sourceDocumentUuid");
// 
// this.projectsToDeleteOnTearDown.add(projectId);
// 
// GitSourceDocumentHandler gitSourceDocumentHandler = new GitSourceDocumentHandler(
// jGitRepoManager, this.gitLabServerManager
// );
// 
// SourceDocument loadedSourceDocument = gitSourceDocumentHandler.open(projectId, sourceDocumentId);
// 
// assertNotNull(loadedSourceDocument);
// assertEquals(
// "William Faulkner",
// loadedSourceDocument.getSourceContentHandler().getSourceDocumentInfo().getContentInfoSet()
// .getAuthor()
// );
// assertEquals(
// "A Rose for Emily",
// loadedSourceDocument.getSourceContentHandler().getSourceDocumentInfo().getContentInfoSet()
// .getTitle()
// );
// assertNotNull(loadedSourceDocument.getRevisionHash());
// }
// }
@Test
public void update() throws Exception {
    File originalSourceDocument = new File("testdocs/rose_for_emily.pdf");
    File convertedSourceDocument = new File("testdocs/rose_for_emily.txt");
    FileInputStream originalSourceDocumentStream = new FileInputStream(originalSourceDocument);
    FileInputStream convertedSourceDocumentStream = new FileInputStream(convertedSourceDocument);
    IndexInfoSet indexInfoSet = new IndexInfoSet();
    indexInfoSet.setLocale(Locale.ENGLISH);
    ContentInfoSet contentInfoSet = new ContentInfoSet("William Faulkner", "", "", "A Rose for Emily");
    TechInfoSet techInfoSet = new TechInfoSet(FileType.TEXT, StandardCharsets.UTF_8, FileOSType.DOS, 705211438L);
    SourceDocumentInfo sourceDocumentInfo = new SourceDocumentInfo(indexInfoSet, contentInfoSet, techInfoSet);
    Map<String, List<TermInfo>> terms = new TermExtractor(IOUtils.toString(convertedSourceDocumentStream, techInfoSet.getCharset()), new ArrayList<>(), new ArrayList<>(), indexInfoSet.getLocale()).getTerms();
    // need to re-instantiate the stream, otherwise an empty file will be written later on (FileInputStream does not support `reset`)
    convertedSourceDocumentStream = new FileInputStream(convertedSourceDocument);
    String sourceDocumentUuid = new IDGenerator().generateDocumentId();
    // GraphWorktreeProject.TOKENIZED_FILE_EXTENSION
    String tokenizedSourceDocumentFileName = sourceDocumentUuid + "." + "json";
    try (ILocalGitRepositoryManager jGitRepoManager = new JGitRepoManager(CATMAPropertyKey.GitBasedRepositoryBasePath.getValue(), gitlabManagerRestricted.getUser())) {
        directoriesToDeleteOnTearDown.add(jGitRepoManager.getRepositoryBasePath());
        BackgroundService mockBackgroundService = mock(BackgroundService.class);
        EventBus mockEventBus = mock(EventBus.class);
        GitProjectManager gitProjectManager = new GitProjectManager(CATMAPropertyKey.GitBasedRepositoryBasePath.getValue(), gitlabManagerRestricted, // noop deletion handler
        (projectId) -> {
        }, mockBackgroundService, mockEventBus);
        String projectId = gitProjectManager.create("Test CATMA Project", "This is a test CATMA project");
        // we don't add the projectId to projectsToDeleteOnTearDown as deletion of the user will take care of that for us
        // the JGitRepoManager instance should always be in a detached state after GitProjectManager calls return
        assertFalse(jGitRepoManager.isAttached());
        GitSourceDocumentHandler gitSourceDocumentHandler = new GitSourceDocumentHandler(jGitRepoManager, gitlabManagerRestricted, new UsernamePasswordCredentialsProvider("oauth2", gitlabManagerRestricted.getPassword()));
        String revisionHash = gitSourceDocumentHandler.create(projectId, sourceDocumentUuid, originalSourceDocumentStream, originalSourceDocument.getName(), convertedSourceDocumentStream, convertedSourceDocument.getName(), terms, tokenizedSourceDocumentFileName, sourceDocumentInfo);
        assertNotNull(revisionHash);
        // the JGitRepoManager instance should always be in a detached state after GitSourceDocumentHandler calls return
        assertFalse(jGitRepoManager.isAttached());
        // TODO: factor out a function that does all of the above
        jGitRepoManager.open(projectId, sourceDocumentUuid);
        jGitRepoManager.push(new UsernamePasswordCredentialsProvider("oauth2", gitlabManagerRestricted.getPassword()));
        String remoteUri = jGitRepoManager.getRemoteUrl(null);
        jGitRepoManager.detach();
        // open the project root repository
        jGitRepoManager.open(projectId, GitProjectManager.getProjectRootRepositoryName(projectId));
        // create the submodule
        File targetSubmodulePath = Paths.get(jGitRepoManager.getRepositoryWorkTree().getAbsolutePath(), SOURCE_DOCUMENT_SUBMODULES_DIRECTORY_NAME, sourceDocumentUuid).toFile();
        // submodule files and the changed .gitmodules file are automatically staged
        jGitRepoManager.addSubmodule(targetSubmodulePath, remoteUri, new UsernamePasswordCredentialsProvider("oauth2", gitlabManagerRestricted.getPassword()));
        jGitRepoManager.detach();
        SourceDocument sourceDocument = gitSourceDocumentHandler.open(projectId, sourceDocumentUuid);
        sourceDocument.getSourceContentHandler().getSourceDocumentInfo().setContentInfoSet(new ContentInfoSet("William Faulkner (updated)", "Test description (new)", "Test publisher (new)", "A Rose for Emily (updated)"));
        String sourceDocumentRevision = gitSourceDocumentHandler.update(projectId, sourceDocument);
        assertNotNull(sourceDocumentRevision);
        String expectedSerializedSourceDocumentInfo = "" + "{\n" + "  \"gitContentInfoSet\": {\n" + "    \"author\": \"William Faulkner (updated)\",\n" + "    \"description\": \"Test description (new)\",\n" + "    \"publisher\": \"Test publisher (new)\",\n" + "    \"title\": \"A Rose for Emily (updated)\"\n" + "  },\n" + "  \"gitIndexInfoSet\": {\n" + "    \"locale\": \"en\",\n" + "    \"unseparableCharacterSequences\": [],\n" + "    \"userDefinedSeparatingCharacters\": []\n" + "  },\n" + "  \"gitTechInfoSet\": {\n" + "    \"charset\": \"UTF-8\",\n" + "    \"checksum\": 705211438,\n" + "    \"fileName\": null,\n" + "    \"fileOSType\": \"DOS\",\n" + "    \"fileType\": \"TEXT\",\n" + "    \"mimeType\": \"text/plain\",\n" + "    \"uri\": null\n" + "  }\n" + "}";
        assertEquals(expectedSerializedSourceDocumentInfo, FileUtils.readFileToString(new File(targetSubmodulePath, "header.json"), StandardCharsets.UTF_8));
    }
}
Also used : UsernamePasswordCredentialsProvider(org.eclipse.jgit.transport.UsernamePasswordCredentialsProvider) BackgroundService(de.catma.backgroundservice.BackgroundService) SourceDocumentInfo(de.catma.document.source.SourceDocumentInfo) ILocalGitRepositoryManager(de.catma.repository.git.interfaces.ILocalGitRepositoryManager) JGitRepoManager(de.catma.repository.git.managers.JGitRepoManager) SourceDocument(de.catma.document.source.SourceDocument) TermExtractor(de.catma.indexer.TermExtractor) EventBus(com.google.common.eventbus.EventBus) FileInputStream(java.io.FileInputStream) ContentInfoSet(de.catma.document.source.ContentInfoSet) IndexInfoSet(de.catma.document.source.IndexInfoSet) TechInfoSet(de.catma.document.source.TechInfoSet) File(java.io.File) IDGenerator(de.catma.util.IDGenerator) GitLabServerManagerTest(de.catma.repository.git.managers.GitLabServerManagerTest) Test(org.junit.jupiter.api.Test)

Example 34 with SourceDocument

use of de.catma.document.source.SourceDocument in project catma by forTEXT.

the class GitSourceDocumentHandlerTest method open.

@Test
public void open() throws Exception {
    try (JGitRepoManager jGitRepoManager = new JGitRepoManager(this.catmaProperties.getProperty(RepositoryPropertyKey.GitBasedRepositoryBasePath.name()), this.catmaUser)) {
        this.directoriesToDeleteOnTearDown.add(jGitRepoManager.getRepositoryBasePath());
        HashMap<String, Object> getJsonLdWebAnnotationResult = JsonLdWebAnnotationTest.getJsonLdWebAnnotation(jGitRepoManager, this.gitLabServerManager, this.catmaUser);
        String projectId = (String) getJsonLdWebAnnotationResult.get("projectUuid");
        String sourceDocumentId = (String) getJsonLdWebAnnotationResult.get("sourceDocumentUuid");
        this.projectsToDeleteOnTearDown.add(projectId);
        GitSourceDocumentHandler gitSourceDocumentHandler = new GitSourceDocumentHandler(jGitRepoManager, this.gitLabServerManager);
        SourceDocument loadedSourceDocument = gitSourceDocumentHandler.open(projectId, sourceDocumentId);
        assertNotNull(loadedSourceDocument);
        assertEquals("William Faulkner", loadedSourceDocument.getSourceContentHandler().getSourceDocumentInfo().getContentInfoSet().getAuthor());
        assertEquals("A Rose for Emily", loadedSourceDocument.getSourceContentHandler().getSourceDocumentInfo().getContentInfoSet().getTitle());
        assertNotNull(loadedSourceDocument.getRevisionHash());
    }
}
Also used : JGitRepoManager(de.catma.repository.git.managers.JGitRepoManager) SourceDocument(de.catma.document.source.SourceDocument) GitLabServerManagerTest(de.catma.repository.git.managers.GitLabServerManagerTest) Test(org.junit.Test) JsonLdWebAnnotationTest(de.catma.repository.git.serialization.models.json_ld.JsonLdWebAnnotationTest)

Example 35 with SourceDocument

use of de.catma.document.source.SourceDocument in project catma by forTEXT.

the class KwicPanel method handleRemoveAnnotationsRequest.

private void handleRemoveAnnotationsRequest(EventBus eventBus) {
    final Set<QueryResultRow> selectedRows = kwicGrid.getSelectedItems();
    if (selectedRows.isEmpty()) {
        Notification.show("Info", "Please select one or more Annotation rows!", Type.HUMANIZED_MESSAGE);
        return;
    }
    int annotationRows = 0;
    List<AnnotationCollectionReference> annotationCollectionReferences = new ArrayList<>();
    boolean resourcesMissing = false;
    boolean permissionsMissing = false;
    Set<String> tagInstanceIdsToBeRemoved = new HashSet<String>();
    Set<QueryResultRow> rowsToBeRemoved = new HashSet<>();
    try {
        LoadingCache<String, Boolean> collectionIdToHasWritePermission = CacheBuilder.newBuilder().build(new CacheLoader<String, Boolean>() {

            @Override
            public Boolean load(String collectionId) throws Exception {
                return project.hasPermission(project.getRoleForCollection(collectionId), RBACPermission.COLLECTION_WRITE);
            }
        });
        for (QueryResultRow row : selectedRows) {
            if (row instanceof TagQueryResultRow) {
                annotationRows++;
                if (project.hasDocument(row.getSourceDocumentId())) {
                    SourceDocument document = project.getSourceDocument(row.getSourceDocumentId());
                    AnnotationCollectionReference collRef = document.getUserMarkupCollectionReference(((TagQueryResultRow) row).getMarkupCollectionId());
                    if (collRef != null) {
                        if (collectionIdToHasWritePermission.get(collRef.getId())) {
                            annotationCollectionReferences.add(collRef);
                            tagInstanceIdsToBeRemoved.add(((TagQueryResultRow) row).getTagInstanceId());
                            rowsToBeRemoved.add(row);
                        } else {
                            permissionsMissing = true;
                        }
                    } else {
                        resourcesMissing = true;
                    }
                } else {
                    resourcesMissing = true;
                }
            }
        }
        if (permissionsMissing) {
            Notification.show("Info", "You do not have the write permission for one or more Collections referenced by your selection. Those Collections will be ignored!", Type.HUMANIZED_MESSAGE);
        }
        if (annotationRows == 0) {
            Notification.show("Info", "Your selection does not contain any Annotations! Please select Annotations only!", Type.HUMANIZED_MESSAGE);
            return;
        }
        if (annotationCollectionReferences.isEmpty()) {
            Notification.show("Info", "The Documents and/or Collections referenced by your selection are no longer part of the Project!", Type.HUMANIZED_MESSAGE);
            return;
        }
        if (resourcesMissing) {
            Notification.show("Info", "Some of the Documents and/or Collections referenced by your selection " + "are no longer part of the Project and will be ignored, " + "see columns 'Document' and 'Collection' for details!", Type.HUMANIZED_MESSAGE);
        }
        if (annotationRows != selectedRows.size()) {
            Notification.show("Info", "Some rows of your selection do not represent Annotations and will be ignored, see column 'Tag' for details!", Type.HUMANIZED_MESSAGE);
        }
        AnnotationCollectionManager collectionManager = new AnnotationCollectionManager(project);
        for (AnnotationCollectionReference ref : annotationCollectionReferences) {
            collectionManager.add(project.getUserMarkupCollection(ref));
        }
        collectionManager.removeTagInstance(tagInstanceIdsToBeRemoved, true);
        kwicDataProvider.getItems().removeAll(rowsToBeRemoved);
        kwicDataProvider.refreshAll();
    } catch (Exception e) {
        ((ErrorHandler) UI.getCurrent()).showAndLogError("error deleting Annotations!", e);
    }
}
Also used : TagQueryResultRow(de.catma.queryengine.result.TagQueryResultRow) QueryResultRow(de.catma.queryengine.result.QueryResultRow) ArrayList(java.util.ArrayList) SourceDocument(de.catma.document.source.SourceDocument) AnnotationCollectionReference(de.catma.document.annotation.AnnotationCollectionReference) AnnotationCollectionManager(de.catma.document.annotation.AnnotationCollectionManager) TagQueryResultRow(de.catma.queryengine.result.TagQueryResultRow) HashSet(java.util.HashSet)

Aggregations

SourceDocument (de.catma.document.source.SourceDocument)48 AnnotationCollectionReference (de.catma.document.annotation.AnnotationCollectionReference)23 Project (de.catma.project.Project)17 AnnotationCollection (de.catma.document.annotation.AnnotationCollection)15 List (java.util.List)15 TagsetDefinition (de.catma.tag.TagsetDefinition)13 IOException (java.io.IOException)13 Collectors (java.util.stream.Collectors)13 UI (com.vaadin.ui.UI)12 IDGenerator (de.catma.util.IDGenerator)12 HashSet (java.util.HashSet)12 ErrorHandler (de.catma.ui.module.main.ErrorHandler)11 EventBus (com.google.common.eventbus.EventBus)10 Subscribe (com.google.common.eventbus.Subscribe)10 CollectionChangeEvent (de.catma.project.event.CollectionChangeEvent)10 TreeDataProvider (com.vaadin.data.provider.TreeDataProvider)9 ChangeType (de.catma.project.event.ChangeType)9 RBACPermission (de.catma.rbac.RBACPermission)9 Pair (de.catma.util.Pair)9 Indexer (de.catma.indexer.Indexer)8