Search in sources :

Example 1 with IndexInfoSet

use of de.catma.document.source.IndexInfoSet in project catma by forTEXT.

the class TeiDocument method getIndexInfoSet.

public IndexInfoSet getIndexInfoSet() {
    TechnicalDescription td = teiHeader.getTechnicalDescription();
    IndexInfoSet iis = new IndexInfoSet(td.getUnseparableCharacterSequenceList(), td.getUserDefinedSeparatingCharacterList(), teiHeader.getLanguage());
    return iis;
}
Also used : IndexInfoSet(de.catma.document.source.IndexInfoSet)

Example 2 with IndexInfoSet

use of de.catma.document.source.IndexInfoSet in project catma by forTEXT.

the class GitProjectHandlerTest method createSourceDocument.

// @Test
// public void delete() throws Exception {
// try (ILocalGitRepositoryManager jGitRepoManager = new JGitRepoManager(this.catmaProperties.getProperty(CATMAPropertyKey.GitBasedRepositoryBasePath.name()), this.catmaUser)) {
// this.directoriesToDeleteOnTearDown.add(jGitRepoManager.getRepositoryBasePath());
// 
// GitProjectManager gitProjectHandler = new GitProjectManager(
// CATMAPropertyKey.GitBasedRepositoryBasePath.getValue(),
// UserIdentification.userToMap(this.catmaUser.getIdentifier()));
// 
// 
// String projectId = gitProjectHandler.create(
// "Test CATMA Project", "This is a test CATMA project"
// );
// // we don't add the projectId to this.projectsToDeleteOnTearDown as this is the delete test
// 
// assertNotNull(projectId);
// assert projectId.startsWith("CATMA_");
// 
// // the JGitRepoManager instance should always be in a detached state after GitProjectHandler calls
// // return
// assertFalse(jGitRepoManager.isAttached());
// 
// String expectedRootRepositoryName = GitProjectManager.getProjectRootRepositoryName(projectId);
// 
// File expectedRootRepositoryPath = new File(
// jGitRepoManager.getRepositoryBasePath(), expectedRootRepositoryName
// );
// 
// assert expectedRootRepositoryPath.exists();
// assert expectedRootRepositoryPath.isDirectory();
// 
// gitProjectHandler.delete(projectId);
// 
// assertFalse(expectedRootRepositoryPath.exists());
// 
// // the JGitRepoManager instance should always be in a detached state after GitProjectHandler calls
// // return
// assertFalse(jGitRepoManager.isAttached());
// }
// }
// 
// @Test
// public void createTagset() throws Exception {
// try (JGitRepoManager jGitRepoManager = new JGitRepoManager(this.catmaProperties.getProperty(CATMAPropertyKey.GitBasedRepositoryBasePath.name()), this.catmaUser)) {
// this.directoriesToDeleteOnTearDown.add(jGitRepoManager.getRepositoryBasePath());
// 
// GitProjectManager gitProjectManager = new GitProjectManager(
// CATMAPropertyKey.GitBasedRepositoryBasePath.getValue(),
// UserIdentification.userToMap(this.catmaUser.getIdentifier()));
// 
// 
// String projectId = gitProjectManager.create(
// "Test CATMA Project",
// "This is a test CATMA project"
// );
// this.projectsToDeleteOnTearDown.add(projectId);
// 
// // the JGitRepoManager instance should always be in a detached state after GitProjectHandler calls return
// assertFalse(jGitRepoManager.isAttached());
// 
// GitProjectHandler gitProjectHandler = new GitProjectHandler(null, projectId, jGitRepoManager, gitLabServerManager);
// 
// String tagsetId = gitProjectHandler.createTagset(
// 
// null,
// "Test Tagset",
// null
// );
// 
// assertNotNull(tagsetId);
// 
// // the JGitRepoManager instance should always be in a detached state after GitProjectHandler calls return
// assertFalse(jGitRepoManager.isAttached());
// 
// jGitRepoManager.open(projectId, GitProjectManager.getProjectRootRepositoryName(projectId));
// Status status = jGitRepoManager.getGitApi().status().call();
// Set<String> added = status.getAdded();
// 
// assert status.hasUncommittedChanges();
// assert added.contains(".gitmodules");
// assert added.contains(String.format("%s/%s", GitProjectHandler.TAGSET_SUBMODULES_DIRECTORY_NAME, tagsetId));
// }
// }
// 
// @Test
// public void createMarkupCollection() throws Exception {
// try (JGitRepoManager jGitRepoManager = new JGitRepoManager(this.catmaProperties.getProperty(CATMAPropertyKey.GitBasedRepositoryBasePath.name()), this.catmaUser)) {
// this.directoriesToDeleteOnTearDown.add(jGitRepoManager.getRepositoryBasePath());
// 
// GitProjectManager gitProjectManager = new GitProjectManager(
// CATMAPropertyKey.GitBasedRepositoryBasePath.getValue(),
// UserIdentification.userToMap(this.catmaUser.getIdentifier()));
// 
// 
// String projectId = gitProjectManager.create(
// "Test CATMA Project",
// "This is a test CATMA project"
// );
// this.projectsToDeleteOnTearDown.add(projectId);
// 
// // the JGitRepoManager instance should always be in a detached state after GitProjectHandler calls return
// assertFalse(jGitRepoManager.isAttached());
// 
// GitProjectHandler gitProjectHandler = new GitProjectHandler(null, projectId, jGitRepoManager, gitLabServerManager);
// 
// String markupCollectionId = gitProjectHandler.createMarkupCollection(
// null,
// "Test Markup Collection",
// null,
// "fakeSourceDocumentId",
// "fakeSourceDocumentVersion"
// );
// 
// assertNotNull(markupCollectionId);
// 
// // the JGitRepoManager instance should always be in a detached state after GitProjectHandler calls return
// assertFalse(jGitRepoManager.isAttached());
// 
// jGitRepoManager.open(projectId, GitProjectManager.getProjectRootRepositoryName(projectId));
// Status status = jGitRepoManager.getGitApi().status().call();
// Set<String> added = status.getAdded();
// 
// assert status.hasUncommittedChanges();
// assert added.contains(".gitmodules");
// assert added.contains(
// String.format(
// "%s/%s", GitProjectHandler.MARKUP_COLLECTION_SUBMODULES_DIRECTORY_NAME, markupCollectionId
// )
// );
// }
// }
@Test
public void createSourceDocument() throws Exception {
    File originalSourceDocument = new File("testdocs/rose_for_emily.pdf");
    File convertedSourceDocument = new File("testdocs/rose_for_emily.txt");
    FileInputStream originalSourceDocumentStream = new FileInputStream(originalSourceDocument);
    FileInputStream convertedSourceDocumentStream = new FileInputStream(convertedSourceDocument);
    IndexInfoSet indexInfoSet = new IndexInfoSet();
    indexInfoSet.setLocale(Locale.ENGLISH);
    ContentInfoSet contentInfoSet = new ContentInfoSet("William Faulkner", "", "", "A Rose for Emily");
    TechInfoSet techInfoSet = new TechInfoSet(FileType.TEXT, StandardCharsets.UTF_8, FileOSType.DOS, 705211438L);
    SourceDocumentInfo sourceDocumentInfo = new SourceDocumentInfo(indexInfoSet, contentInfoSet, techInfoSet);
    Map<String, List<TermInfo>> terms = new TermExtractor(IOUtils.toString(convertedSourceDocumentStream, techInfoSet.getCharset()), new ArrayList<>(), new ArrayList<>(), indexInfoSet.getLocale()).getTerms();
    // need to re-instantiate the stream, otherwise an empty file will be written later on (FileInputStream does not support `reset`)
    convertedSourceDocumentStream = new FileInputStream(convertedSourceDocument);
    String sourceDocumentUuid = new IDGenerator().generateDocumentId();
    // GraphWorktreeProject.TOKENIZED_FILE_EXTENSION
    String tokenizedSourceDocumentFileName = sourceDocumentUuid + "." + "json";
    try (JGitRepoManager jGitRepoManager = new JGitRepoManager(CATMAPropertyKey.GitBasedRepositoryBasePath.getValue(), gitlabManagerRestricted.getUser())) {
        directoriesToDeleteOnTearDown.add(jGitRepoManager.getRepositoryBasePath());
        BackgroundService mockBackgroundService = mock(BackgroundService.class);
        EventBus mockEventBus = mock(EventBus.class);
        GitProjectManager gitProjectManager = new GitProjectManager(CATMAPropertyKey.GitBasedRepositoryBasePath.getValue(), gitlabManagerRestricted, // noop deletion handler
        (projectId) -> {
        }, mockBackgroundService, mockEventBus);
        String projectId = gitProjectManager.create("Test CATMA Project", "This is a test CATMA project");
        // we don't add the projectId to projectsToDeleteOnTearDown as deletion of the user will take care of that for us
        // the JGitRepoManager instance should always be in a detached state after GitProjectManager calls return
        assertFalse(jGitRepoManager.isAttached());
        GitProjectHandler gitProjectHandler = new GitProjectHandler(gitlabManagerRestricted.getUser(), projectId, jGitRepoManager, gitlabManagerRestricted);
        // would usually happen when the project is opened via GraphWorktreeProject
        gitProjectHandler.loadRolesPerResource();
        String revisionHash = gitProjectHandler.createSourceDocument(sourceDocumentUuid, originalSourceDocumentStream, originalSourceDocument.getName(), convertedSourceDocumentStream, convertedSourceDocument.getName(), terms, tokenizedSourceDocumentFileName, sourceDocumentInfo);
        assertNotNull(revisionHash);
        // the JGitRepoManager instance should always be in a detached state after GitProjectHandler calls return
        assertFalse(jGitRepoManager.isAttached());
        jGitRepoManager.open(projectId, GitProjectManager.getProjectRootRepositoryName(projectId));
        Status status = jGitRepoManager.getGitApi().status().call();
        assert status.isClean();
        assertFalse(status.hasUncommittedChanges());
        Iterable<RevCommit> commits = jGitRepoManager.getGitApi().log().all().call();
        @SuppressWarnings("unchecked") List<RevCommit> commitsList = IteratorUtils.toList(commits.iterator());
        assertEquals(1, commitsList.size());
        // TODO: it would be good to check that the revision hash of the commit matches, however GitProjectHandler currently returns the revision hash
        // from the source document repo itself rather than from the root repo
        assertEquals(gitlabManagerRestricted.getUser().getIdentifier(), commitsList.get(0).getCommitterIdent().getName());
        assertEquals(gitlabManagerRestricted.getUser().getEmail(), commitsList.get(0).getCommitterIdent().getEmailAddress());
        assert commitsList.get(0).getFullMessage().contains(String.format("Added Document %s with ID", contentInfoSet.getTitle()));
    // TODO: add assertions for actual paths changed (see commented above - would need to be modified for already committed changes)
    }
}
Also used : Status(org.eclipse.jgit.api.Status) BackgroundService(de.catma.backgroundservice.BackgroundService) SourceDocumentInfo(de.catma.document.source.SourceDocumentInfo) JGitRepoManager(de.catma.repository.git.managers.JGitRepoManager) TermExtractor(de.catma.indexer.TermExtractor) EventBus(com.google.common.eventbus.EventBus) FileInputStream(java.io.FileInputStream) ContentInfoSet(de.catma.document.source.ContentInfoSet) IndexInfoSet(de.catma.document.source.IndexInfoSet) TechInfoSet(de.catma.document.source.TechInfoSet) File(java.io.File) IDGenerator(de.catma.util.IDGenerator) RevCommit(org.eclipse.jgit.revwalk.RevCommit) GitLabServerManagerTest(de.catma.repository.git.managers.GitLabServerManagerTest) Test(org.junit.jupiter.api.Test)

Example 3 with IndexInfoSet

use of de.catma.document.source.IndexInfoSet in project catma by forTEXT.

the class GitSourceDocumentHandlerTest method update.

// // how to test for exceptions: https://stackoverflow.com/a/31826781
// @Rule
// public ExpectedException thrown = ExpectedException.none();
// 
// @Test
// public void delete() throws Exception {
// try (ILocalGitRepositoryManager jGitRepoManager = new JGitRepoManager(this.catmaProperties.getProperty(RepositoryPropertyKey.GitBasedRepositoryBasePath.name()), this.catmaUser)) {
// GitSourceDocumentHandler gitSourceDocumentHandler = new GitSourceDocumentHandler(
// jGitRepoManager, this.gitLabServerManager
// );
// 
// thrown.expect(IOException.class);
// thrown.expectMessage("Not implemented");
// gitSourceDocumentHandler.delete("fakeProjectId", "fakeSourceDocumentId");
// }
// }
// 
// @Test
// public void open() throws Exception {
// try (JGitRepoManager jGitRepoManager = new JGitRepoManager(this.catmaProperties.getProperty(RepositoryPropertyKey.GitBasedRepositoryBasePath.name()), this.catmaUser)) {
// this.directoriesToDeleteOnTearDown.add(jGitRepoManager.getRepositoryBasePath());
// 
// HashMap<String, Object> getJsonLdWebAnnotationResult = JsonLdWebAnnotationTest.getJsonLdWebAnnotation(
// jGitRepoManager, this.gitLabServerManager, this.catmaUser
// );
// 
// String projectId = (String)getJsonLdWebAnnotationResult.get("projectUuid");
// String sourceDocumentId = (String)getJsonLdWebAnnotationResult.get("sourceDocumentUuid");
// 
// this.projectsToDeleteOnTearDown.add(projectId);
// 
// GitSourceDocumentHandler gitSourceDocumentHandler = new GitSourceDocumentHandler(
// jGitRepoManager, this.gitLabServerManager
// );
// 
// SourceDocument loadedSourceDocument = gitSourceDocumentHandler.open(projectId, sourceDocumentId);
// 
// assertNotNull(loadedSourceDocument);
// assertEquals(
// "William Faulkner",
// loadedSourceDocument.getSourceContentHandler().getSourceDocumentInfo().getContentInfoSet()
// .getAuthor()
// );
// assertEquals(
// "A Rose for Emily",
// loadedSourceDocument.getSourceContentHandler().getSourceDocumentInfo().getContentInfoSet()
// .getTitle()
// );
// assertNotNull(loadedSourceDocument.getRevisionHash());
// }
// }
@Test
public void update() throws Exception {
    File originalSourceDocument = new File("testdocs/rose_for_emily.pdf");
    File convertedSourceDocument = new File("testdocs/rose_for_emily.txt");
    FileInputStream originalSourceDocumentStream = new FileInputStream(originalSourceDocument);
    FileInputStream convertedSourceDocumentStream = new FileInputStream(convertedSourceDocument);
    IndexInfoSet indexInfoSet = new IndexInfoSet();
    indexInfoSet.setLocale(Locale.ENGLISH);
    ContentInfoSet contentInfoSet = new ContentInfoSet("William Faulkner", "", "", "A Rose for Emily");
    TechInfoSet techInfoSet = new TechInfoSet(FileType.TEXT, StandardCharsets.UTF_8, FileOSType.DOS, 705211438L);
    SourceDocumentInfo sourceDocumentInfo = new SourceDocumentInfo(indexInfoSet, contentInfoSet, techInfoSet);
    Map<String, List<TermInfo>> terms = new TermExtractor(IOUtils.toString(convertedSourceDocumentStream, techInfoSet.getCharset()), new ArrayList<>(), new ArrayList<>(), indexInfoSet.getLocale()).getTerms();
    // need to re-instantiate the stream, otherwise an empty file will be written later on (FileInputStream does not support `reset`)
    convertedSourceDocumentStream = new FileInputStream(convertedSourceDocument);
    String sourceDocumentUuid = new IDGenerator().generateDocumentId();
    // GraphWorktreeProject.TOKENIZED_FILE_EXTENSION
    String tokenizedSourceDocumentFileName = sourceDocumentUuid + "." + "json";
    try (ILocalGitRepositoryManager jGitRepoManager = new JGitRepoManager(CATMAPropertyKey.GitBasedRepositoryBasePath.getValue(), gitlabManagerRestricted.getUser())) {
        directoriesToDeleteOnTearDown.add(jGitRepoManager.getRepositoryBasePath());
        BackgroundService mockBackgroundService = mock(BackgroundService.class);
        EventBus mockEventBus = mock(EventBus.class);
        GitProjectManager gitProjectManager = new GitProjectManager(CATMAPropertyKey.GitBasedRepositoryBasePath.getValue(), gitlabManagerRestricted, // noop deletion handler
        (projectId) -> {
        }, mockBackgroundService, mockEventBus);
        String projectId = gitProjectManager.create("Test CATMA Project", "This is a test CATMA project");
        // we don't add the projectId to projectsToDeleteOnTearDown as deletion of the user will take care of that for us
        // the JGitRepoManager instance should always be in a detached state after GitProjectManager calls return
        assertFalse(jGitRepoManager.isAttached());
        GitSourceDocumentHandler gitSourceDocumentHandler = new GitSourceDocumentHandler(jGitRepoManager, gitlabManagerRestricted, new UsernamePasswordCredentialsProvider("oauth2", gitlabManagerRestricted.getPassword()));
        String revisionHash = gitSourceDocumentHandler.create(projectId, sourceDocumentUuid, originalSourceDocumentStream, originalSourceDocument.getName(), convertedSourceDocumentStream, convertedSourceDocument.getName(), terms, tokenizedSourceDocumentFileName, sourceDocumentInfo);
        assertNotNull(revisionHash);
        // the JGitRepoManager instance should always be in a detached state after GitSourceDocumentHandler calls return
        assertFalse(jGitRepoManager.isAttached());
        // TODO: factor out a function that does all of the above
        jGitRepoManager.open(projectId, sourceDocumentUuid);
        jGitRepoManager.push(new UsernamePasswordCredentialsProvider("oauth2", gitlabManagerRestricted.getPassword()));
        String remoteUri = jGitRepoManager.getRemoteUrl(null);
        jGitRepoManager.detach();
        // open the project root repository
        jGitRepoManager.open(projectId, GitProjectManager.getProjectRootRepositoryName(projectId));
        // create the submodule
        File targetSubmodulePath = Paths.get(jGitRepoManager.getRepositoryWorkTree().getAbsolutePath(), SOURCE_DOCUMENT_SUBMODULES_DIRECTORY_NAME, sourceDocumentUuid).toFile();
        // submodule files and the changed .gitmodules file are automatically staged
        jGitRepoManager.addSubmodule(targetSubmodulePath, remoteUri, new UsernamePasswordCredentialsProvider("oauth2", gitlabManagerRestricted.getPassword()));
        jGitRepoManager.detach();
        SourceDocument sourceDocument = gitSourceDocumentHandler.open(projectId, sourceDocumentUuid);
        sourceDocument.getSourceContentHandler().getSourceDocumentInfo().setContentInfoSet(new ContentInfoSet("William Faulkner (updated)", "Test description (new)", "Test publisher (new)", "A Rose for Emily (updated)"));
        String sourceDocumentRevision = gitSourceDocumentHandler.update(projectId, sourceDocument);
        assertNotNull(sourceDocumentRevision);
        String expectedSerializedSourceDocumentInfo = "" + "{\n" + "  \"gitContentInfoSet\": {\n" + "    \"author\": \"William Faulkner (updated)\",\n" + "    \"description\": \"Test description (new)\",\n" + "    \"publisher\": \"Test publisher (new)\",\n" + "    \"title\": \"A Rose for Emily (updated)\"\n" + "  },\n" + "  \"gitIndexInfoSet\": {\n" + "    \"locale\": \"en\",\n" + "    \"unseparableCharacterSequences\": [],\n" + "    \"userDefinedSeparatingCharacters\": []\n" + "  },\n" + "  \"gitTechInfoSet\": {\n" + "    \"charset\": \"UTF-8\",\n" + "    \"checksum\": 705211438,\n" + "    \"fileName\": null,\n" + "    \"fileOSType\": \"DOS\",\n" + "    \"fileType\": \"TEXT\",\n" + "    \"mimeType\": \"text/plain\",\n" + "    \"uri\": null\n" + "  }\n" + "}";
        assertEquals(expectedSerializedSourceDocumentInfo, FileUtils.readFileToString(new File(targetSubmodulePath, "header.json"), StandardCharsets.UTF_8));
    }
}
Also used : UsernamePasswordCredentialsProvider(org.eclipse.jgit.transport.UsernamePasswordCredentialsProvider) BackgroundService(de.catma.backgroundservice.BackgroundService) SourceDocumentInfo(de.catma.document.source.SourceDocumentInfo) ILocalGitRepositoryManager(de.catma.repository.git.interfaces.ILocalGitRepositoryManager) JGitRepoManager(de.catma.repository.git.managers.JGitRepoManager) SourceDocument(de.catma.document.source.SourceDocument) TermExtractor(de.catma.indexer.TermExtractor) EventBus(com.google.common.eventbus.EventBus) FileInputStream(java.io.FileInputStream) ContentInfoSet(de.catma.document.source.ContentInfoSet) IndexInfoSet(de.catma.document.source.IndexInfoSet) TechInfoSet(de.catma.document.source.TechInfoSet) File(java.io.File) IDGenerator(de.catma.util.IDGenerator) GitLabServerManagerTest(de.catma.repository.git.managers.GitLabServerManagerTest) Test(org.junit.jupiter.api.Test)

Example 4 with IndexInfoSet

use of de.catma.document.source.IndexInfoSet in project catma by forTEXT.

the class GitSourceDocumentHandlerTest method create.

@Test
public void create() throws Exception {
    File originalSourceDocument = new File("testdocs/rose_for_emily.pdf");
    File convertedSourceDocument = new File("testdocs/rose_for_emily.txt");
    FileInputStream originalSourceDocumentStream = new FileInputStream(originalSourceDocument);
    FileInputStream convertedSourceDocumentStream = new FileInputStream(convertedSourceDocument);
    IndexInfoSet indexInfoSet = new IndexInfoSet();
    indexInfoSet.setLocale(Locale.ENGLISH);
    ContentInfoSet contentInfoSet = new ContentInfoSet("William Faulkner", "", "", "A Rose for Emily");
    TechInfoSet techInfoSet = new TechInfoSet(FileType.TEXT, StandardCharsets.UTF_8, FileOSType.DOS, 705211438L);
    SourceDocumentInfo sourceDocumentInfo = new SourceDocumentInfo(indexInfoSet, contentInfoSet, techInfoSet);
    Map<String, List<TermInfo>> terms = new TermExtractor(IOUtils.toString(convertedSourceDocumentStream, techInfoSet.getCharset()), new ArrayList<>(), new ArrayList<>(), indexInfoSet.getLocale()).getTerms();
    // need to re-instantiate the stream, otherwise an empty file will be written later on (FileInputStream does not support `reset`)
    convertedSourceDocumentStream = new FileInputStream(convertedSourceDocument);
    String sourceDocumentUuid = new IDGenerator().generateDocumentId();
    // GraphWorktreeProject.TOKENIZED_FILE_EXTENSION
    String tokenizedSourceDocumentFileName = sourceDocumentUuid + "." + "json";
    try (ILocalGitRepositoryManager jGitRepoManager = new JGitRepoManager(CATMAPropertyKey.GitBasedRepositoryBasePath.getValue(), gitlabManagerRestricted.getUser())) {
        directoriesToDeleteOnTearDown.add(jGitRepoManager.getRepositoryBasePath());
        BackgroundService mockBackgroundService = mock(BackgroundService.class);
        EventBus mockEventBus = mock(EventBus.class);
        GitProjectManager gitProjectManager = new GitProjectManager(CATMAPropertyKey.GitBasedRepositoryBasePath.getValue(), gitlabManagerRestricted, // noop deletion handler
        (projectId) -> {
        }, mockBackgroundService, mockEventBus);
        String projectId = gitProjectManager.create("Test CATMA Project", "This is a test CATMA project");
        // we don't add the projectId to projectsToDeleteOnTearDown as deletion of the user will take care of that for us
        // the JGitRepoManager instance should always be in a detached state after GitProjectManager calls return
        assertFalse(jGitRepoManager.isAttached());
        GitSourceDocumentHandler gitSourceDocumentHandler = new GitSourceDocumentHandler(jGitRepoManager, gitlabManagerRestricted, new UsernamePasswordCredentialsProvider("oauth2", gitlabManagerRestricted.getPassword()));
        String revisionHash = gitSourceDocumentHandler.create(projectId, sourceDocumentUuid, originalSourceDocumentStream, originalSourceDocument.getName(), convertedSourceDocumentStream, convertedSourceDocument.getName(), terms, tokenizedSourceDocumentFileName, sourceDocumentInfo);
        assertNotNull(revisionHash);
        // the JGitRepoManager instance should always be in a detached state after GitSourceDocumentHandler calls return
        assertFalse(jGitRepoManager.isAttached());
        File expectedRepoPath = Paths.get(jGitRepoManager.getRepositoryBasePath().getPath(), projectId, sourceDocumentUuid).toFile();
        assert expectedRepoPath.exists();
        assert expectedRepoPath.isDirectory();
        assert Arrays.asList(expectedRepoPath.list()).contains("rose_for_emily.pdf");
        assert Arrays.asList(expectedRepoPath.list()).contains("rose_for_emily.txt");
        assert FileUtils.contentEquals(originalSourceDocument, new File(expectedRepoPath, "rose_for_emily.pdf"));
        assert FileUtils.contentEquals(convertedSourceDocument, new File(expectedRepoPath, "rose_for_emily.txt"));
        assert Arrays.asList(expectedRepoPath.list()).contains("header.json");
        String expectedSerializedSourceDocumentInfo = "" + "{\n" + "  \"gitContentInfoSet\": {\n" + "    \"author\": \"William Faulkner\",\n" + "    \"description\": \"\",\n" + "    \"publisher\": \"\",\n" + "    \"title\": \"A Rose for Emily\"\n" + "  },\n" + "  \"gitIndexInfoSet\": {\n" + "    \"locale\": \"en\",\n" + "    \"unseparableCharacterSequences\": [],\n" + "    \"userDefinedSeparatingCharacters\": []\n" + "  },\n" + "  \"gitTechInfoSet\": {\n" + "    \"charset\": \"UTF-8\",\n" + "    \"checksum\": 705211438,\n" + "    \"fileName\": null,\n" + "    \"fileOSType\": \"DOS\",\n" + "    \"fileType\": \"TEXT\",\n" + "    \"mimeType\": \"text/plain\",\n" + "    \"uri\": null\n" + "  }\n" + "}";
        assertEquals(expectedSerializedSourceDocumentInfo, FileUtils.readFileToString(new File(expectedRepoPath, "header.json"), StandardCharsets.UTF_8));
    }
}
Also used : UsernamePasswordCredentialsProvider(org.eclipse.jgit.transport.UsernamePasswordCredentialsProvider) BackgroundService(de.catma.backgroundservice.BackgroundService) SourceDocumentInfo(de.catma.document.source.SourceDocumentInfo) ILocalGitRepositoryManager(de.catma.repository.git.interfaces.ILocalGitRepositoryManager) JGitRepoManager(de.catma.repository.git.managers.JGitRepoManager) TermExtractor(de.catma.indexer.TermExtractor) EventBus(com.google.common.eventbus.EventBus) FileInputStream(java.io.FileInputStream) ContentInfoSet(de.catma.document.source.ContentInfoSet) IndexInfoSet(de.catma.document.source.IndexInfoSet) TechInfoSet(de.catma.document.source.TechInfoSet) File(java.io.File) IDGenerator(de.catma.util.IDGenerator) GitLabServerManagerTest(de.catma.repository.git.managers.GitLabServerManagerTest) Test(org.junit.jupiter.api.Test)

Example 5 with IndexInfoSet

use of de.catma.document.source.IndexInfoSet in project catma by forTEXT.

the class JsonLdWebAnnotationTest method getJsonLdWebAnnotation.

/**
 * @return a HashMap<String, Object> with these keys:
 *         'jsonLdWebAnnotation' - for the JsonLdWebAnnotation object
 *         'projectUuid'
 *         --- following additional keys which are to be used when formatting EXPECTED_SERIALIZED_ANNOTATION ---:
 *         projectRootRepositoryName, tagsetDefinitionUuid, tagDefinitionUuid, userPropertyDefinitionUuid,
 *         systemPropertyDefinitionUuid, userMarkupCollectionUuid, tagInstanceUuid, sourceDocumentUuid
 */
public static HashMap<String, Object> getJsonLdWebAnnotation(JGitRepoManager jGitRepoManager, IRemoteGitServerManager gitLabServerManager, de.catma.user.User catmaUser) throws Exception {
    try (JGitRepoManager localJGitRepoManager = jGitRepoManager) {
        // caller should do the following:
        // this.directoriesToDeleteOnTearDown.add(localJGitRepoManager.getRepositoryBasePath());
        // create project
        GitProjectManager gitProjectManager = new GitProjectManager(RepositoryPropertyKey.GitBasedRepositoryBasePath.getValue(), UserIdentification.userToMap(catmaUser.getIdentifier()));
        String projectId = gitProjectManager.create("Test CATMA Project", "This is a test CATMA project");
        // caller should do the following:
        // this.projectsToDeleteOnTearDown.add(projectId);
        GitProjectHandler gitProjectHandler = new GitProjectHandler(null, projectId, jGitRepoManager, gitLabServerManager);
        // add new tagset to project
        String tagsetId = gitProjectHandler.createTagset(null, "Test Tagset", null);
        // add new source document to project
        File originalSourceDocument = new File("testdocs/rose_for_emily.pdf");
        File convertedSourceDocument = new File("testdocs/rose_for_emily.txt");
        FileInputStream originalSourceDocumentStream = new FileInputStream(originalSourceDocument);
        FileInputStream convertedSourceDocumentStream = new FileInputStream(convertedSourceDocument);
        IndexInfoSet indexInfoSet = new IndexInfoSet();
        indexInfoSet.setLocale(Locale.ENGLISH);
        ContentInfoSet contentInfoSet = new ContentInfoSet("William Faulkner", "", "", "A Rose for Emily");
        TechInfoSet techInfoSet = new TechInfoSet(FileType.TEXT, StandardCharsets.UTF_8, FileOSType.DOS, 705211438L);
        SourceDocumentInfo sourceDocumentInfo = new SourceDocumentInfo(indexInfoSet, contentInfoSet, techInfoSet);
        String sourceDocumentId = gitProjectHandler.createSourceDocument(null, originalSourceDocumentStream, originalSourceDocument.getName(), convertedSourceDocumentStream, convertedSourceDocument.getName(), null, null, sourceDocumentInfo);
        // add new markup collection to project
        String markupCollectionId = gitProjectHandler.createMarkupCollection(null, "Test Markup Collection", null, sourceDocumentId, "fakeSourceDocumentVersion");
        // commit the changes to the project root repo (addition of tagset, source document and markup collection
        // submodules)
        String projectRootRepositoryName = GitProjectManager.getProjectRootRepositoryName(projectId);
        localJGitRepoManager.open(projectId, projectRootRepositoryName);
        localJGitRepoManager.commit(String.format("Adding new tagset %s, source document %s and markup collection %s", tagsetId, sourceDocumentId, markupCollectionId), "Test Committer", "testcommitter@catma.de");
        // can't call open on an attached instance
        localJGitRepoManager.detach();
        // construct TagDefinition object
        IDGenerator idGenerator = new IDGenerator();
        List<String> systemPropertyPossibleValues = Arrays.asList("SYSPROP_VAL_1", "SYSPROP_VAL_2");
        PropertyDefinition systemPropertyDefinition = new PropertyDefinition(PropertyDefinition.SystemPropertyName.catma_displaycolor.toString(), systemPropertyPossibleValues);
        List<String> userPropertyPossibleValues = Arrays.asList("UPROP_VAL_1", "UPROP_VAL_2");
        PropertyDefinition userPropertyDefinition = new PropertyDefinition("UPROP_DEF", userPropertyPossibleValues);
        String tagDefinitionUuid = idGenerator.generate();
        TagDefinition tagDefinition = new TagDefinition(null, tagDefinitionUuid, "TAG_DEF", new Version(), null, null, tagsetId);
        tagDefinition.addSystemPropertyDefinition(systemPropertyDefinition);
        tagDefinition.addUserDefinedPropertyDefinition(userPropertyDefinition);
        // call createTagDefinition
        // NB: in this case we know that the tagset submodule is on the master branch tip, ie: not in a detached
        // head state, so it's safe to make changes to the submodule and commit them
        // TODO: createTagDefinition should probably do some validation and fail fast if the tagset submodule is in
        // a detached head state - in that case the submodule would need to be updated first
        // see the "Updating a submodule in-place in the container" scenario at
        // https://medium.com/@porteneuve/mastering-git-submodules-34c65e940407
        GitTagsetHandler gitTagsetHandler = new GitTagsetHandler(localJGitRepoManager, gitLabServerManager);
        String returnedTagDefinitionId = gitTagsetHandler.createOrUpdateTagDefinition(projectId, tagsetId, tagDefinition);
        assertNotNull(returnedTagDefinitionId);
        assert returnedTagDefinitionId.startsWith("CATMA_");
        // the JGitRepoManager instance should always be in a detached state after GitTagsetHandler calls return
        assertFalse(localJGitRepoManager.isAttached());
        assertEquals(tagDefinitionUuid, returnedTagDefinitionId);
        // commit and push submodule changes (creation of tag definition)
        // TODO: add methods to JGitRepoManager to do this
        localJGitRepoManager.open(projectId, projectRootRepositoryName);
        Repository projectRootRepository = localJGitRepoManager.getGitApi().getRepository();
        String tagsetSubmodulePath = String.format("%s/%s", GitProjectHandler.TAGSET_SUBMODULES_DIRECTORY_NAME, tagsetId);
        Repository tagsetSubmoduleRepository = SubmoduleWalk.getSubmoduleRepository(projectRootRepository, tagsetSubmodulePath);
        Git submoduleGit = new Git(tagsetSubmoduleRepository);
        submoduleGit.add().addFilepattern(tagDefinitionUuid).call();
        submoduleGit.commit().setMessage(String.format("Adding tag definition %s", tagDefinitionUuid)).setCommitter("Test Committer", "testcommitter@catma.de").call();
        submoduleGit.push().setCredentialsProvider(new UsernamePasswordCredentialsProvider(gitLabServerManager.getUsername(), gitLabServerManager.getPassword())).call();
        tagsetSubmoduleRepository.close();
        submoduleGit.close();
        // commit and push project root repo changes (update of tagset submodule)
        localJGitRepoManager.getGitApi().add().addFilepattern(tagsetSubmodulePath).call();
        localJGitRepoManager.commit(String.format("Updating tagset %s", tagsetId), "Test Committer", "testcommitter@catma.de");
        // construct TagInstance object
        Property systemProperty = new Property(systemPropertyDefinition, Collections.singleton("SYSPROP_VAL_1"));
        Property userProperty = new Property(userPropertyDefinition, Collections.singleton("UPROP_VAL_2"));
        String tagInstanceUuid = idGenerator.generate();
        TagInstance tagInstance = new TagInstance(tagInstanceUuid, tagDefinition);
        tagInstance.addSystemProperty(systemProperty);
        tagInstance.addUserDefinedProperty(userProperty);
        // construct JsonLdWebAnnotation object
        String sourceDocumentUri = String.format("http://catma.de/gitlab/%s/%s/%s", projectRootRepositoryName, GitProjectHandler.SOURCE_DOCUMENT_SUBMODULES_DIRECTORY_NAME, sourceDocumentId);
        Range range1 = new Range(12, 18);
        Range range2 = new Range(41, 47);
        List<TagReference> tagReferences = new ArrayList<>(Arrays.asList(new TagReference(tagInstance, sourceDocumentUri, range1, markupCollectionId), new TagReference(tagInstance, sourceDocumentUri, range2, markupCollectionId)));
        JsonLdWebAnnotation jsonLdWebAnnotation = new JsonLdWebAnnotation("http://catma.de/gitlab", projectId, tagReferences);
        HashMap<String, Object> returnValue = new HashMap<>();
        returnValue.put("jsonLdWebAnnotation", jsonLdWebAnnotation);
        returnValue.put("projectRootRepositoryName", projectRootRepositoryName);
        returnValue.put("projectUuid", projectId);
        returnValue.put("tagsetDefinitionUuid", tagsetId);
        returnValue.put("tagDefinitionUuid", tagDefinitionUuid);
        returnValue.put("userMarkupCollectionUuid", markupCollectionId);
        returnValue.put("tagInstanceUuid", tagInstanceUuid);
        returnValue.put("sourceDocumentUuid", sourceDocumentId);
        return returnValue;
    }
}
Also used : TagDefinition(de.catma.tag.TagDefinition) SourceDocumentInfo(de.catma.document.source.SourceDocumentInfo) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) GitProjectManager(de.catma.repository.git.GitProjectManager) ContentInfoSet(de.catma.document.source.ContentInfoSet) IndexInfoSet(de.catma.document.source.IndexInfoSet) Version(de.catma.tag.Version) TechInfoSet(de.catma.document.source.TechInfoSet) Property(de.catma.tag.Property) GitTagsetHandler(de.catma.repository.git.GitTagsetHandler) UsernamePasswordCredentialsProvider(org.eclipse.jgit.transport.UsernamePasswordCredentialsProvider) JGitRepoManager(de.catma.repository.git.managers.JGitRepoManager) Range(de.catma.document.Range) PropertyDefinition(de.catma.tag.PropertyDefinition) FileInputStream(java.io.FileInputStream) Repository(org.eclipse.jgit.lib.Repository) Git(org.eclipse.jgit.api.Git) TagInstance(de.catma.tag.TagInstance) GitProjectHandler(de.catma.repository.git.GitProjectHandler) TagReference(de.catma.document.standoffmarkup.usermarkup.TagReference) File(java.io.File) IDGenerator(de.catma.util.IDGenerator)

Aggregations

IndexInfoSet (de.catma.document.source.IndexInfoSet)8 SourceDocumentInfo (de.catma.document.source.SourceDocumentInfo)7 TechInfoSet (de.catma.document.source.TechInfoSet)7 ContentInfoSet (de.catma.document.source.ContentInfoSet)6 File (java.io.File)6 FileInputStream (java.io.FileInputStream)6 IDGenerator (de.catma.util.IDGenerator)5 JGitRepoManager (de.catma.repository.git.managers.JGitRepoManager)4 EventBus (com.google.common.eventbus.EventBus)3 BackgroundService (de.catma.backgroundservice.BackgroundService)3 TermExtractor (de.catma.indexer.TermExtractor)3 GitLabServerManagerTest (de.catma.repository.git.managers.GitLabServerManagerTest)3 UsernamePasswordCredentialsProvider (org.eclipse.jgit.transport.UsernamePasswordCredentialsProvider)3 Test (org.junit.jupiter.api.Test)3 SourceDocument (de.catma.document.source.SourceDocument)2 XML2ContentHandler (de.catma.document.source.contenthandler.XML2ContentHandler)2 ILocalGitRepositoryManager (de.catma.repository.git.interfaces.ILocalGitRepositoryManager)2 Property (de.catma.tag.Property)2 PropertyDefinition (de.catma.tag.PropertyDefinition)2 TagDefinition (de.catma.tag.TagDefinition)2