use of de.catma.document.annotation.AnnotationCollection in project catma by forTEXT.
the class CorpusExporter method export.
public void export(String exportName, Corpus corpus, OutputStream os) throws IOException {
OutputStream tarFileOs = new GZIPOutputStream(os);
TarArchiveOutputStream taOut = new TarArchiveOutputStream(tarFileOs, "UTF-8");
try {
taOut.setLongFileMode(TarArchiveOutputStream.LONGFILE_GNU);
taOut.setBigNumberMode(TarArchiveOutputStream.BIGNUMBER_POSIX);
for (SourceDocument sd : corpus.getSourceDocuments()) {
TarArchiveEntry sdEntry = new TarArchiveEntry(getSourceDocEntryName(exportName, sd));
byte[] sdContent = sd.getContent().getBytes(Charset.forName("UTF8"));
sdEntry.setSize(sdContent.length);
taOut.putArchiveEntry(sdEntry);
taOut.write(sdContent);
taOut.closeArchiveEntry();
for (AnnotationCollectionReference umcRef : corpus.getUserMarkupCollectionRefs(sd)) {
AnnotationCollection umc = repo.getUserMarkupCollection(umcRef);
TeiUserMarkupCollectionSerializationHandler handler = new TeiUserMarkupCollectionSerializationHandler(repo.getTagManager(), false);
ByteArrayOutputStream teiDocOut = new ByteArrayOutputStream();
handler.serialize(repo.getUserMarkupCollection(umcRef), sd, teiDocOut);
byte[] umcContent = teiDocOut.toByteArray();
String umcEntryName = getUmcEntryName(exportName, umc, sd);
TarArchiveEntry umcEntry = new TarArchiveEntry(umcEntryName);
umcEntry.setSize(umcContent.length);
taOut.putArchiveEntry(umcEntry);
taOut.write(umcContent);
taOut.closeArchiveEntry();
}
}
} finally {
taOut.finish();
taOut.close();
}
}
use of de.catma.document.annotation.AnnotationCollection in project catma by forTEXT.
the class ProjectResourceExportApiRequestHandler method serializeProjectResources.
private String serializeProjectResources() {
try {
Export export = new Export();
for (SourceDocument sourceDocument : project.getSourceDocuments()) {
ArrayList<AnnotationCollection> annotationCollections = new ArrayList<>();
for (AnnotationCollectionReference annotationCollectionReference : sourceDocument.getUserMarkupCollectionRefs()) {
annotationCollections.add(project.getUserMarkupCollection(annotationCollectionReference));
}
ArrayList<TagDefinition> tagDefinitions = new ArrayList<>();
ArrayList<TagReference> tagReferences = new ArrayList<>();
for (AnnotationCollection annotationCollection : annotationCollections) {
for (TagsetDefinition tagsetDefinition : annotationCollection.getTagLibrary().getTagsetDefinitions()) {
tagDefinitions.addAll(tagsetDefinition.stream().collect(Collectors.toList()));
}
tagReferences.addAll(annotationCollection.getTagReferences());
}
ExportDocument exportDocument = new ExportDocument(new PreApiSourceDocument(sourceDocument, String.format("%s%s/doc/%s", BASE_URL, handlerPath.substring(1), sourceDocument.getUuid().toLowerCase())), tagDefinitions.stream().map(PreApiTagDefinition::new).collect(Collectors.toList()), tagReferences.stream().map((TagReference tagReference) -> {
try {
return new PreApiAnnotation(tagReference, tagDefinitions.stream().filter(td -> td.getUuid().equals(tagReference.getTagDefinitionId())).findFirst().get(), sourceDocument);
} catch (IOException e) {
logger.log(Level.WARNING, String.format("Error serializing TagReference: %s", tagReference), e);
return null;
}
}).collect(Collectors.toList()));
export.addExportDocument(exportDocument);
}
return new SerializationHelper<Export>().serialize(export);
} catch (Exception e) {
logger.log(Level.SEVERE, "Failed to serialize project resources", e);
return "{\"error\": \"Failed to serialize project resources, please contact CATMA support\"}";
}
}
use of de.catma.document.annotation.AnnotationCollection in project catma by forTEXT.
the class ProjectView method addUploadFile.
private void addUploadFile(UploadFile uploadFile, boolean useApostropheAsSeparator, String collectionNamePattern) {
SourceDocumentInfo sourceDocumentInfo = new SourceDocumentInfo(uploadFile.getIndexInfoSet(useApostropheAsSeparator), uploadFile.getContentInfoSet(), uploadFile.getTechInfoSet());
SourceContentHandler contentHandler = sourceDocumentInfo.getTechInfoSet().getMimeType().equals(FileType.XML2.getMimeType()) ? new XML2ContentHandler() : new TikaContentHandler();
contentHandler.setSourceDocumentInfo(sourceDocumentInfo);
SourceDocument document = new SourceDocument(uploadFile.getUuid(), contentHandler);
try {
String content = document.getContent();
FileOSType fileOSType = FileOSType.getFileOSType(content);
sourceDocumentInfo.getTechInfoSet().setFileOSType(fileOSType);
CRC32 checksum = new CRC32();
checksum.update(content.getBytes());
sourceDocumentInfo.getTechInfoSet().setChecksum(checksum.getValue());
project.insert(document);
AnnotationCollection intrinsicMarkupCollection = uploadFile.getIntrinsicMarkupCollection();
if (intrinsicMarkupCollection != null) {
project.importCollection(Collections.emptyList(), intrinsicMarkupCollection);
}
if (collectionNamePattern != null && !collectionNamePattern.isEmpty()) {
String collectionName = collectionNamePattern.replace("{{Title}}", uploadFile.getTitle());
project.createUserMarkupCollection(collectionName, document);
}
} catch (IOException e) {
Logger.getLogger(ProjectView.class.getName()).log(Level.SEVERE, String.format("Error loading content of %1$s", uploadFile.getTempFilename().toString()), e);
String errorMsg = e.getMessage();
if ((errorMsg == null) || (errorMsg.trim().isEmpty())) {
errorMsg = "";
}
Notification.show("Error", String.format("Error loading content of %1$s! " + "This document will be skipped!\n The underlying error message was:\n%2$s", uploadFile.getTitle(), errorMsg), Type.ERROR_MESSAGE);
}
}
use of de.catma.document.annotation.AnnotationCollection in project catma by forTEXT.
the class ImportIntrinsicMarkupStep method enter.
@Override
public void enter(boolean back) {
if (back) {
return;
}
contentPanel.setEnabled(false);
progressBar.setVisible(true);
progressBar.setIndeterminate(true);
@SuppressWarnings("unchecked") final ArrayList<UploadFile> files = new ArrayList<UploadFile>(((Collection<UploadFile>) wizardContext.get(DocumentWizard.WizardContextKey.UPLOAD_FILE_LIST)).stream().filter(uploadFile -> uploadFile.getMimetype().equals(FileType.XML2.getMimeType())).collect(Collectors.toList()));
final TagManager tagmanager = new TagManager(new TagLibrary());
BackgroundServiceProvider backgroundServiceProvider = (BackgroundServiceProvider) UI.getCurrent();
backgroundServiceProvider.submit("inspecting-intrinsic-markup", new DefaultProgressCallable<List<UploadFile>>() {
@Override
public List<UploadFile> call() throws Exception {
IDGenerator idGenerator = new IDGenerator();
for (UploadFile uploadFile : files) {
XML2ContentHandler contentHandler = new XML2ContentHandler();
SourceDocument doc = new SourceDocument(uploadFile.getUuid(), contentHandler);
SourceDocumentInfo documentInfo = new SourceDocumentInfo();
TechInfoSet techInfoSet = new TechInfoSet();
techInfoSet.setURI(uploadFile.getTempFilename());
documentInfo.setTechInfoSet(techInfoSet);
contentHandler.setSourceDocumentInfo(documentInfo);
XmlMarkupCollectionSerializationHandler handler = new XmlMarkupCollectionSerializationHandler(tagmanager, contentHandler, project.getUser().getIdentifier());
try (FileInputStream fis = new FileInputStream(new File(uploadFile.getTempFilename()))) {
AnnotationCollection collection = handler.deserialize(doc, idGenerator.generateCollectionId(), fis);
uploadFile.setIntrinsicMarkupCollection(collection);
}
}
return files;
}
}, new ExecutionListener<List<UploadFile>>() {
@Override
public void done(List<UploadFile> result) {
contentPanel.setEnabled(true);
progressBar.setVisible(false);
progressBar.setIndeterminate(false);
fileList.clear();
fileList.addAll(result);
fileDataProvider.refreshAll();
tagsetImportList.clear();
String defaultIntrinsicXMLElmentsName = "Default Intrinsic XML Elements";
for (TagsetDefinition tagset : tagmanager.getTagLibrary()) {
if (!tagset.isEmpty()) {
TagsetDefinition targetTagset = project.getTagManager().getTagLibrary().getTagsetDefinition(tagset.getUuid());
boolean inProject = false;
if (targetTagset == null) {
targetTagset = tagset;
} else {
inProject = true;
}
String namespace = tagset.getName() == null ? "none" : tagset.getName();
if (tagset.getName() == null) {
tagset.setName(defaultIntrinsicXMLElmentsName);
}
TagsetImport tagsetImport = new TagsetImport(namespace, tagset, targetTagset, inProject ? TagsetImportState.WILL_BE_MERGED : TagsetImportState.WILL_BE_CREATED);
tagsetImportList.add(tagsetImport);
}
}
tagsetDataProvider.refreshAll();
wizardContext.put(DocumentWizard.WizardContextKey.TAGSET_IMPORT_LIST, tagsetImportList);
if (stepChangeListener != null) {
stepChangeListener.stepChanged(ImportIntrinsicMarkupStep.this);
}
}
@Override
public void error(Throwable t) {
Logger.getLogger(ImportIntrinsicMarkupStep.class.getName()).log(Level.SEVERE, "Error inspecting files", t);
String errorMsg = t.getMessage();
if ((errorMsg == null) || (errorMsg.trim().isEmpty())) {
errorMsg = "";
}
Notification.show("Error", String.format("Error inspecting the contents! " + "\n The underlying error message was:\n%1$s", errorMsg), Type.ERROR_MESSAGE);
}
});
}
use of de.catma.document.annotation.AnnotationCollection in project catma by forTEXT.
the class GraphWorktreeProject method loadAnnotationCollection.
public Pair<AnnotationCollection, List<TagsetDefinitionImportStatus>> loadAnnotationCollection(InputStream inputStream, SourceDocument document) throws IOException {
TagManager tagManager = new TagManager(new TagLibrary());
TeiTagLibrarySerializationHandler tagLibrarySerializationHandler = new TeiTagLibrarySerializationHandler(tagManager);
TagLibrary importedLibrary = tagLibrarySerializationHandler.deserialize(null, inputStream);
List<String> resourceIds = gitProjectHandler.getResourceIds();
List<TagsetDefinitionImportStatus> tagsetDefinitionImportStatusList = new ArrayList<>();
for (TagsetDefinition tagset : importedLibrary) {
boolean inProjectHistory = resourceIds.contains(tagset.getUuid());
boolean current = inProjectHistory && (getTagManager().getTagLibrary().getTagsetDefinition(tagset.getUuid()) != null);
tagsetDefinitionImportStatusList.add(new TagsetDefinitionImportStatus(tagset, inProjectHistory, current));
}
String collectionId = idGenerator.generate();
TeiUserMarkupCollectionDeserializer deserializer = new TeiUserMarkupCollectionDeserializer(tagLibrarySerializationHandler.getTeiDocument(), tagManager.getTagLibrary(), collectionId);
AnnotationCollection annotationCollection = new AnnotationCollection(collectionId, tagLibrarySerializationHandler.getTeiDocument().getContentInfoSet(), tagManager.getTagLibrary(), deserializer.getTagReferences(), document.getUuid(), document.getRevisionHash());
return new Pair<>(annotationCollection, tagsetDefinitionImportStatusList);
}
Aggregations