use of de.catma.indexer.TermExtractor in project catma by forTEXT.
the class Phrase method execute.
@Override
protected QueryResult execute() throws Exception {
QueryOptions options = getQueryOptions();
TermExtractor termExtractor = new TermExtractor(phrase, options.getUnseparableCharacterSequences(), options.getUserDefinedSeparatingCharacters(), options.getLocale());
List<String> termList = termExtractor.getTermsInOrder();
QueryResult queryResult = options.getIndexer().searchPhrase(options.getQueryId(), options.getRelevantSourceDocumentIDs(), phrase, termList, options.getLimit());
return queryResult;
}
use of de.catma.indexer.TermExtractor in project catma by forTEXT.
the class GraphWorktreeProject method insert.
@Override
public void insert(SourceDocument sourceDocument, boolean deleteTempFile) throws IOException {
try {
File sourceTempFile = Paths.get(new File(this.tempDir).toURI()).resolve(sourceDocument.getUuid()).toFile();
String convertedFilename = sourceDocument.getUuid() + "." + UTF8_CONVERSION_FILE_EXTENSION;
logger.info("start tokenizing sourcedocument");
List<String> unseparableCharacterSequences = sourceDocument.getSourceContentHandler().getSourceDocumentInfo().getIndexInfoSet().getUnseparableCharacterSequences();
List<Character> userDefinedSeparatingCharacters = sourceDocument.getSourceContentHandler().getSourceDocumentInfo().getIndexInfoSet().getUserDefinedSeparatingCharacters();
Locale locale = sourceDocument.getSourceContentHandler().getSourceDocumentInfo().getIndexInfoSet().getLocale();
TermExtractor termExtractor = new TermExtractor(sourceDocument.getContent(), unseparableCharacterSequences, userDefinedSeparatingCharacters, locale);
final Map<String, List<TermInfo>> terms = termExtractor.getTerms();
logger.info("tokenization finished");
try (FileInputStream originalFileInputStream = new FileInputStream(sourceTempFile)) {
MediaType mediaType = MediaType.parse(sourceDocument.getSourceContentHandler().getSourceDocumentInfo().getTechInfoSet().getMimeType());
String extension = mediaType.getBaseType().getType();
if (extension == null || extension.isEmpty()) {
extension = "unknown";
}
String sourceDocRevisionHash = gitProjectHandler.createSourceDocument(sourceDocument.getUuid(), originalFileInputStream, sourceDocument.getUuid() + ORIG_INFIX + "." + extension, new ByteArrayInputStream(sourceDocument.getContent().getBytes(Charset.forName("UTF-8"))), convertedFilename, terms, sourceDocument.getUuid() + "." + TOKENIZED_FILE_EXTENSION, sourceDocument.getSourceContentHandler().getSourceDocumentInfo());
sourceDocument.unload();
StandardContentHandler contentHandler = new StandardContentHandler();
contentHandler.setSourceDocumentInfo(sourceDocument.getSourceContentHandler().getSourceDocumentInfo());
sourceDocument.setSourceContentHandler(contentHandler);
sourceDocument.setRevisionHash(sourceDocRevisionHash);
}
if (deleteTempFile) {
sourceTempFile.delete();
}
String oldRootRevisionHash = this.rootRevisionHash;
this.rootRevisionHash = gitProjectHandler.getRootRevisionHash();
graphProjectHandler.addSourceDocument(oldRootRevisionHash, this.rootRevisionHash, sourceDocument, getTokenizedSourceDocumentPath(sourceDocument.getUuid()));
eventBus.post(new DocumentChangeEvent(sourceDocument, ChangeType.CREATED));
} catch (Exception e) {
e.printStackTrace();
propertyChangeSupport.firePropertyChange(RepositoryChangeEvent.exceptionOccurred.name(), null, e);
}
}
use of de.catma.indexer.TermExtractor in project catma by forTEXT.
the class GitProjectHandlerTest method createSourceDocument.
// @Test
// public void delete() throws Exception {
// try (ILocalGitRepositoryManager jGitRepoManager = new JGitRepoManager(this.catmaProperties.getProperty(CATMAPropertyKey.GitBasedRepositoryBasePath.name()), this.catmaUser)) {
// this.directoriesToDeleteOnTearDown.add(jGitRepoManager.getRepositoryBasePath());
//
// GitProjectManager gitProjectHandler = new GitProjectManager(
// CATMAPropertyKey.GitBasedRepositoryBasePath.getValue(),
// UserIdentification.userToMap(this.catmaUser.getIdentifier()));
//
//
// String projectId = gitProjectHandler.create(
// "Test CATMA Project", "This is a test CATMA project"
// );
// // we don't add the projectId to this.projectsToDeleteOnTearDown as this is the delete test
//
// assertNotNull(projectId);
// assert projectId.startsWith("CATMA_");
//
// // the JGitRepoManager instance should always be in a detached state after GitProjectHandler calls
// // return
// assertFalse(jGitRepoManager.isAttached());
//
// String expectedRootRepositoryName = GitProjectManager.getProjectRootRepositoryName(projectId);
//
// File expectedRootRepositoryPath = new File(
// jGitRepoManager.getRepositoryBasePath(), expectedRootRepositoryName
// );
//
// assert expectedRootRepositoryPath.exists();
// assert expectedRootRepositoryPath.isDirectory();
//
// gitProjectHandler.delete(projectId);
//
// assertFalse(expectedRootRepositoryPath.exists());
//
// // the JGitRepoManager instance should always be in a detached state after GitProjectHandler calls
// // return
// assertFalse(jGitRepoManager.isAttached());
// }
// }
//
// @Test
// public void createTagset() throws Exception {
// try (JGitRepoManager jGitRepoManager = new JGitRepoManager(this.catmaProperties.getProperty(CATMAPropertyKey.GitBasedRepositoryBasePath.name()), this.catmaUser)) {
// this.directoriesToDeleteOnTearDown.add(jGitRepoManager.getRepositoryBasePath());
//
// GitProjectManager gitProjectManager = new GitProjectManager(
// CATMAPropertyKey.GitBasedRepositoryBasePath.getValue(),
// UserIdentification.userToMap(this.catmaUser.getIdentifier()));
//
//
// String projectId = gitProjectManager.create(
// "Test CATMA Project",
// "This is a test CATMA project"
// );
// this.projectsToDeleteOnTearDown.add(projectId);
//
// // the JGitRepoManager instance should always be in a detached state after GitProjectHandler calls return
// assertFalse(jGitRepoManager.isAttached());
//
// GitProjectHandler gitProjectHandler = new GitProjectHandler(null, projectId, jGitRepoManager, gitLabServerManager);
//
// String tagsetId = gitProjectHandler.createTagset(
//
// null,
// "Test Tagset",
// null
// );
//
// assertNotNull(tagsetId);
//
// // the JGitRepoManager instance should always be in a detached state after GitProjectHandler calls return
// assertFalse(jGitRepoManager.isAttached());
//
// jGitRepoManager.open(projectId, GitProjectManager.getProjectRootRepositoryName(projectId));
// Status status = jGitRepoManager.getGitApi().status().call();
// Set<String> added = status.getAdded();
//
// assert status.hasUncommittedChanges();
// assert added.contains(".gitmodules");
// assert added.contains(String.format("%s/%s", GitProjectHandler.TAGSET_SUBMODULES_DIRECTORY_NAME, tagsetId));
// }
// }
//
// @Test
// public void createMarkupCollection() throws Exception {
// try (JGitRepoManager jGitRepoManager = new JGitRepoManager(this.catmaProperties.getProperty(CATMAPropertyKey.GitBasedRepositoryBasePath.name()), this.catmaUser)) {
// this.directoriesToDeleteOnTearDown.add(jGitRepoManager.getRepositoryBasePath());
//
// GitProjectManager gitProjectManager = new GitProjectManager(
// CATMAPropertyKey.GitBasedRepositoryBasePath.getValue(),
// UserIdentification.userToMap(this.catmaUser.getIdentifier()));
//
//
// String projectId = gitProjectManager.create(
// "Test CATMA Project",
// "This is a test CATMA project"
// );
// this.projectsToDeleteOnTearDown.add(projectId);
//
// // the JGitRepoManager instance should always be in a detached state after GitProjectHandler calls return
// assertFalse(jGitRepoManager.isAttached());
//
// GitProjectHandler gitProjectHandler = new GitProjectHandler(null, projectId, jGitRepoManager, gitLabServerManager);
//
// String markupCollectionId = gitProjectHandler.createMarkupCollection(
// null,
// "Test Markup Collection",
// null,
// "fakeSourceDocumentId",
// "fakeSourceDocumentVersion"
// );
//
// assertNotNull(markupCollectionId);
//
// // the JGitRepoManager instance should always be in a detached state after GitProjectHandler calls return
// assertFalse(jGitRepoManager.isAttached());
//
// jGitRepoManager.open(projectId, GitProjectManager.getProjectRootRepositoryName(projectId));
// Status status = jGitRepoManager.getGitApi().status().call();
// Set<String> added = status.getAdded();
//
// assert status.hasUncommittedChanges();
// assert added.contains(".gitmodules");
// assert added.contains(
// String.format(
// "%s/%s", GitProjectHandler.MARKUP_COLLECTION_SUBMODULES_DIRECTORY_NAME, markupCollectionId
// )
// );
// }
// }
@Test
public void createSourceDocument() throws Exception {
File originalSourceDocument = new File("testdocs/rose_for_emily.pdf");
File convertedSourceDocument = new File("testdocs/rose_for_emily.txt");
FileInputStream originalSourceDocumentStream = new FileInputStream(originalSourceDocument);
FileInputStream convertedSourceDocumentStream = new FileInputStream(convertedSourceDocument);
IndexInfoSet indexInfoSet = new IndexInfoSet();
indexInfoSet.setLocale(Locale.ENGLISH);
ContentInfoSet contentInfoSet = new ContentInfoSet("William Faulkner", "", "", "A Rose for Emily");
TechInfoSet techInfoSet = new TechInfoSet(FileType.TEXT, StandardCharsets.UTF_8, FileOSType.DOS, 705211438L);
SourceDocumentInfo sourceDocumentInfo = new SourceDocumentInfo(indexInfoSet, contentInfoSet, techInfoSet);
Map<String, List<TermInfo>> terms = new TermExtractor(IOUtils.toString(convertedSourceDocumentStream, techInfoSet.getCharset()), new ArrayList<>(), new ArrayList<>(), indexInfoSet.getLocale()).getTerms();
// need to re-instantiate the stream, otherwise an empty file will be written later on (FileInputStream does not support `reset`)
convertedSourceDocumentStream = new FileInputStream(convertedSourceDocument);
String sourceDocumentUuid = new IDGenerator().generateDocumentId();
// GraphWorktreeProject.TOKENIZED_FILE_EXTENSION
String tokenizedSourceDocumentFileName = sourceDocumentUuid + "." + "json";
try (JGitRepoManager jGitRepoManager = new JGitRepoManager(CATMAPropertyKey.GitBasedRepositoryBasePath.getValue(), gitlabManagerRestricted.getUser())) {
directoriesToDeleteOnTearDown.add(jGitRepoManager.getRepositoryBasePath());
BackgroundService mockBackgroundService = mock(BackgroundService.class);
EventBus mockEventBus = mock(EventBus.class);
GitProjectManager gitProjectManager = new GitProjectManager(CATMAPropertyKey.GitBasedRepositoryBasePath.getValue(), gitlabManagerRestricted, // noop deletion handler
(projectId) -> {
}, mockBackgroundService, mockEventBus);
String projectId = gitProjectManager.create("Test CATMA Project", "This is a test CATMA project");
// we don't add the projectId to projectsToDeleteOnTearDown as deletion of the user will take care of that for us
// the JGitRepoManager instance should always be in a detached state after GitProjectManager calls return
assertFalse(jGitRepoManager.isAttached());
GitProjectHandler gitProjectHandler = new GitProjectHandler(gitlabManagerRestricted.getUser(), projectId, jGitRepoManager, gitlabManagerRestricted);
// would usually happen when the project is opened via GraphWorktreeProject
gitProjectHandler.loadRolesPerResource();
String revisionHash = gitProjectHandler.createSourceDocument(sourceDocumentUuid, originalSourceDocumentStream, originalSourceDocument.getName(), convertedSourceDocumentStream, convertedSourceDocument.getName(), terms, tokenizedSourceDocumentFileName, sourceDocumentInfo);
assertNotNull(revisionHash);
// the JGitRepoManager instance should always be in a detached state after GitProjectHandler calls return
assertFalse(jGitRepoManager.isAttached());
jGitRepoManager.open(projectId, GitProjectManager.getProjectRootRepositoryName(projectId));
Status status = jGitRepoManager.getGitApi().status().call();
assert status.isClean();
assertFalse(status.hasUncommittedChanges());
Iterable<RevCommit> commits = jGitRepoManager.getGitApi().log().all().call();
@SuppressWarnings("unchecked") List<RevCommit> commitsList = IteratorUtils.toList(commits.iterator());
assertEquals(1, commitsList.size());
// TODO: it would be good to check that the revision hash of the commit matches, however GitProjectHandler currently returns the revision hash
// from the source document repo itself rather than from the root repo
assertEquals(gitlabManagerRestricted.getUser().getIdentifier(), commitsList.get(0).getCommitterIdent().getName());
assertEquals(gitlabManagerRestricted.getUser().getEmail(), commitsList.get(0).getCommitterIdent().getEmailAddress());
assert commitsList.get(0).getFullMessage().contains(String.format("Added Document %s with ID", contentInfoSet.getTitle()));
// TODO: add assertions for actual paths changed (see commented above - would need to be modified for already committed changes)
}
}
use of de.catma.indexer.TermExtractor in project catma by forTEXT.
the class GitSourceDocumentHandlerTest method update.
// // how to test for exceptions: https://stackoverflow.com/a/31826781
// @Rule
// public ExpectedException thrown = ExpectedException.none();
//
// @Test
// public void delete() throws Exception {
// try (ILocalGitRepositoryManager jGitRepoManager = new JGitRepoManager(this.catmaProperties.getProperty(RepositoryPropertyKey.GitBasedRepositoryBasePath.name()), this.catmaUser)) {
// GitSourceDocumentHandler gitSourceDocumentHandler = new GitSourceDocumentHandler(
// jGitRepoManager, this.gitLabServerManager
// );
//
// thrown.expect(IOException.class);
// thrown.expectMessage("Not implemented");
// gitSourceDocumentHandler.delete("fakeProjectId", "fakeSourceDocumentId");
// }
// }
//
// @Test
// public void open() throws Exception {
// try (JGitRepoManager jGitRepoManager = new JGitRepoManager(this.catmaProperties.getProperty(RepositoryPropertyKey.GitBasedRepositoryBasePath.name()), this.catmaUser)) {
// this.directoriesToDeleteOnTearDown.add(jGitRepoManager.getRepositoryBasePath());
//
// HashMap<String, Object> getJsonLdWebAnnotationResult = JsonLdWebAnnotationTest.getJsonLdWebAnnotation(
// jGitRepoManager, this.gitLabServerManager, this.catmaUser
// );
//
// String projectId = (String)getJsonLdWebAnnotationResult.get("projectUuid");
// String sourceDocumentId = (String)getJsonLdWebAnnotationResult.get("sourceDocumentUuid");
//
// this.projectsToDeleteOnTearDown.add(projectId);
//
// GitSourceDocumentHandler gitSourceDocumentHandler = new GitSourceDocumentHandler(
// jGitRepoManager, this.gitLabServerManager
// );
//
// SourceDocument loadedSourceDocument = gitSourceDocumentHandler.open(projectId, sourceDocumentId);
//
// assertNotNull(loadedSourceDocument);
// assertEquals(
// "William Faulkner",
// loadedSourceDocument.getSourceContentHandler().getSourceDocumentInfo().getContentInfoSet()
// .getAuthor()
// );
// assertEquals(
// "A Rose for Emily",
// loadedSourceDocument.getSourceContentHandler().getSourceDocumentInfo().getContentInfoSet()
// .getTitle()
// );
// assertNotNull(loadedSourceDocument.getRevisionHash());
// }
// }
@Test
public void update() throws Exception {
File originalSourceDocument = new File("testdocs/rose_for_emily.pdf");
File convertedSourceDocument = new File("testdocs/rose_for_emily.txt");
FileInputStream originalSourceDocumentStream = new FileInputStream(originalSourceDocument);
FileInputStream convertedSourceDocumentStream = new FileInputStream(convertedSourceDocument);
IndexInfoSet indexInfoSet = new IndexInfoSet();
indexInfoSet.setLocale(Locale.ENGLISH);
ContentInfoSet contentInfoSet = new ContentInfoSet("William Faulkner", "", "", "A Rose for Emily");
TechInfoSet techInfoSet = new TechInfoSet(FileType.TEXT, StandardCharsets.UTF_8, FileOSType.DOS, 705211438L);
SourceDocumentInfo sourceDocumentInfo = new SourceDocumentInfo(indexInfoSet, contentInfoSet, techInfoSet);
Map<String, List<TermInfo>> terms = new TermExtractor(IOUtils.toString(convertedSourceDocumentStream, techInfoSet.getCharset()), new ArrayList<>(), new ArrayList<>(), indexInfoSet.getLocale()).getTerms();
// need to re-instantiate the stream, otherwise an empty file will be written later on (FileInputStream does not support `reset`)
convertedSourceDocumentStream = new FileInputStream(convertedSourceDocument);
String sourceDocumentUuid = new IDGenerator().generateDocumentId();
// GraphWorktreeProject.TOKENIZED_FILE_EXTENSION
String tokenizedSourceDocumentFileName = sourceDocumentUuid + "." + "json";
try (ILocalGitRepositoryManager jGitRepoManager = new JGitRepoManager(CATMAPropertyKey.GitBasedRepositoryBasePath.getValue(), gitlabManagerRestricted.getUser())) {
directoriesToDeleteOnTearDown.add(jGitRepoManager.getRepositoryBasePath());
BackgroundService mockBackgroundService = mock(BackgroundService.class);
EventBus mockEventBus = mock(EventBus.class);
GitProjectManager gitProjectManager = new GitProjectManager(CATMAPropertyKey.GitBasedRepositoryBasePath.getValue(), gitlabManagerRestricted, // noop deletion handler
(projectId) -> {
}, mockBackgroundService, mockEventBus);
String projectId = gitProjectManager.create("Test CATMA Project", "This is a test CATMA project");
// we don't add the projectId to projectsToDeleteOnTearDown as deletion of the user will take care of that for us
// the JGitRepoManager instance should always be in a detached state after GitProjectManager calls return
assertFalse(jGitRepoManager.isAttached());
GitSourceDocumentHandler gitSourceDocumentHandler = new GitSourceDocumentHandler(jGitRepoManager, gitlabManagerRestricted, new UsernamePasswordCredentialsProvider("oauth2", gitlabManagerRestricted.getPassword()));
String revisionHash = gitSourceDocumentHandler.create(projectId, sourceDocumentUuid, originalSourceDocumentStream, originalSourceDocument.getName(), convertedSourceDocumentStream, convertedSourceDocument.getName(), terms, tokenizedSourceDocumentFileName, sourceDocumentInfo);
assertNotNull(revisionHash);
// the JGitRepoManager instance should always be in a detached state after GitSourceDocumentHandler calls return
assertFalse(jGitRepoManager.isAttached());
// TODO: factor out a function that does all of the above
jGitRepoManager.open(projectId, sourceDocumentUuid);
jGitRepoManager.push(new UsernamePasswordCredentialsProvider("oauth2", gitlabManagerRestricted.getPassword()));
String remoteUri = jGitRepoManager.getRemoteUrl(null);
jGitRepoManager.detach();
// open the project root repository
jGitRepoManager.open(projectId, GitProjectManager.getProjectRootRepositoryName(projectId));
// create the submodule
File targetSubmodulePath = Paths.get(jGitRepoManager.getRepositoryWorkTree().getAbsolutePath(), SOURCE_DOCUMENT_SUBMODULES_DIRECTORY_NAME, sourceDocumentUuid).toFile();
// submodule files and the changed .gitmodules file are automatically staged
jGitRepoManager.addSubmodule(targetSubmodulePath, remoteUri, new UsernamePasswordCredentialsProvider("oauth2", gitlabManagerRestricted.getPassword()));
jGitRepoManager.detach();
SourceDocument sourceDocument = gitSourceDocumentHandler.open(projectId, sourceDocumentUuid);
sourceDocument.getSourceContentHandler().getSourceDocumentInfo().setContentInfoSet(new ContentInfoSet("William Faulkner (updated)", "Test description (new)", "Test publisher (new)", "A Rose for Emily (updated)"));
String sourceDocumentRevision = gitSourceDocumentHandler.update(projectId, sourceDocument);
assertNotNull(sourceDocumentRevision);
String expectedSerializedSourceDocumentInfo = "" + "{\n" + " \"gitContentInfoSet\": {\n" + " \"author\": \"William Faulkner (updated)\",\n" + " \"description\": \"Test description (new)\",\n" + " \"publisher\": \"Test publisher (new)\",\n" + " \"title\": \"A Rose for Emily (updated)\"\n" + " },\n" + " \"gitIndexInfoSet\": {\n" + " \"locale\": \"en\",\n" + " \"unseparableCharacterSequences\": [],\n" + " \"userDefinedSeparatingCharacters\": []\n" + " },\n" + " \"gitTechInfoSet\": {\n" + " \"charset\": \"UTF-8\",\n" + " \"checksum\": 705211438,\n" + " \"fileName\": null,\n" + " \"fileOSType\": \"DOS\",\n" + " \"fileType\": \"TEXT\",\n" + " \"mimeType\": \"text/plain\",\n" + " \"uri\": null\n" + " }\n" + "}";
assertEquals(expectedSerializedSourceDocumentInfo, FileUtils.readFileToString(new File(targetSubmodulePath, "header.json"), StandardCharsets.UTF_8));
}
}
use of de.catma.indexer.TermExtractor in project catma by forTEXT.
the class GitSourceDocumentHandlerTest method create.
@Test
public void create() throws Exception {
File originalSourceDocument = new File("testdocs/rose_for_emily.pdf");
File convertedSourceDocument = new File("testdocs/rose_for_emily.txt");
FileInputStream originalSourceDocumentStream = new FileInputStream(originalSourceDocument);
FileInputStream convertedSourceDocumentStream = new FileInputStream(convertedSourceDocument);
IndexInfoSet indexInfoSet = new IndexInfoSet();
indexInfoSet.setLocale(Locale.ENGLISH);
ContentInfoSet contentInfoSet = new ContentInfoSet("William Faulkner", "", "", "A Rose for Emily");
TechInfoSet techInfoSet = new TechInfoSet(FileType.TEXT, StandardCharsets.UTF_8, FileOSType.DOS, 705211438L);
SourceDocumentInfo sourceDocumentInfo = new SourceDocumentInfo(indexInfoSet, contentInfoSet, techInfoSet);
Map<String, List<TermInfo>> terms = new TermExtractor(IOUtils.toString(convertedSourceDocumentStream, techInfoSet.getCharset()), new ArrayList<>(), new ArrayList<>(), indexInfoSet.getLocale()).getTerms();
// need to re-instantiate the stream, otherwise an empty file will be written later on (FileInputStream does not support `reset`)
convertedSourceDocumentStream = new FileInputStream(convertedSourceDocument);
String sourceDocumentUuid = new IDGenerator().generateDocumentId();
// GraphWorktreeProject.TOKENIZED_FILE_EXTENSION
String tokenizedSourceDocumentFileName = sourceDocumentUuid + "." + "json";
try (ILocalGitRepositoryManager jGitRepoManager = new JGitRepoManager(CATMAPropertyKey.GitBasedRepositoryBasePath.getValue(), gitlabManagerRestricted.getUser())) {
directoriesToDeleteOnTearDown.add(jGitRepoManager.getRepositoryBasePath());
BackgroundService mockBackgroundService = mock(BackgroundService.class);
EventBus mockEventBus = mock(EventBus.class);
GitProjectManager gitProjectManager = new GitProjectManager(CATMAPropertyKey.GitBasedRepositoryBasePath.getValue(), gitlabManagerRestricted, // noop deletion handler
(projectId) -> {
}, mockBackgroundService, mockEventBus);
String projectId = gitProjectManager.create("Test CATMA Project", "This is a test CATMA project");
// we don't add the projectId to projectsToDeleteOnTearDown as deletion of the user will take care of that for us
// the JGitRepoManager instance should always be in a detached state after GitProjectManager calls return
assertFalse(jGitRepoManager.isAttached());
GitSourceDocumentHandler gitSourceDocumentHandler = new GitSourceDocumentHandler(jGitRepoManager, gitlabManagerRestricted, new UsernamePasswordCredentialsProvider("oauth2", gitlabManagerRestricted.getPassword()));
String revisionHash = gitSourceDocumentHandler.create(projectId, sourceDocumentUuid, originalSourceDocumentStream, originalSourceDocument.getName(), convertedSourceDocumentStream, convertedSourceDocument.getName(), terms, tokenizedSourceDocumentFileName, sourceDocumentInfo);
assertNotNull(revisionHash);
// the JGitRepoManager instance should always be in a detached state after GitSourceDocumentHandler calls return
assertFalse(jGitRepoManager.isAttached());
File expectedRepoPath = Paths.get(jGitRepoManager.getRepositoryBasePath().getPath(), projectId, sourceDocumentUuid).toFile();
assert expectedRepoPath.exists();
assert expectedRepoPath.isDirectory();
assert Arrays.asList(expectedRepoPath.list()).contains("rose_for_emily.pdf");
assert Arrays.asList(expectedRepoPath.list()).contains("rose_for_emily.txt");
assert FileUtils.contentEquals(originalSourceDocument, new File(expectedRepoPath, "rose_for_emily.pdf"));
assert FileUtils.contentEquals(convertedSourceDocument, new File(expectedRepoPath, "rose_for_emily.txt"));
assert Arrays.asList(expectedRepoPath.list()).contains("header.json");
String expectedSerializedSourceDocumentInfo = "" + "{\n" + " \"gitContentInfoSet\": {\n" + " \"author\": \"William Faulkner\",\n" + " \"description\": \"\",\n" + " \"publisher\": \"\",\n" + " \"title\": \"A Rose for Emily\"\n" + " },\n" + " \"gitIndexInfoSet\": {\n" + " \"locale\": \"en\",\n" + " \"unseparableCharacterSequences\": [],\n" + " \"userDefinedSeparatingCharacters\": []\n" + " },\n" + " \"gitTechInfoSet\": {\n" + " \"charset\": \"UTF-8\",\n" + " \"checksum\": 705211438,\n" + " \"fileName\": null,\n" + " \"fileOSType\": \"DOS\",\n" + " \"fileType\": \"TEXT\",\n" + " \"mimeType\": \"text/plain\",\n" + " \"uri\": null\n" + " }\n" + "}";
assertEquals(expectedSerializedSourceDocumentInfo, FileUtils.readFileToString(new File(expectedRepoPath, "header.json"), StandardCharsets.UTF_8));
}
}
Aggregations