use of de.catma.document.source.contenthandler.SourceContentHandler in project catma by forTEXT.
the class CorpusExporter method getFilename.
private String getFilename(SourceDocument sourceDocument, boolean withFileExtension) {
SourceContentHandler sourceContentHandler = sourceDocument.getSourceContentHandler();
String title = sourceContentHandler.getSourceDocumentInfo().getContentInfoSet().getTitle();
if (simpleEntryStyle) {
return sourceDocument.toString() + (withFileExtension ? ".txt" : "");
}
return sourceDocument.getUuid() + (((title == null) || title.isEmpty()) ? "" : ("_" + title)) + (withFileExtension ? ".txt" : "");
}
use of de.catma.document.source.contenthandler.SourceContentHandler in project catma by forTEXT.
the class SourceDocumentExportOptionsDialog method getFilenameUTF8.
private String getFilenameUTF8(SourceDocument sourceDocument) {
SourceContentHandler sourceContentHandler = sourceDocument.getSourceContentHandler();
String title = sourceContentHandler.getSourceDocumentInfo().getContentInfoSet().getTitle();
if (title != null) {
// $NON-NLS-1$ //$NON-NLS-2$
title = title.replaceAll("\\s", "_");
}
return // $NON-NLS-1$ //$NON-NLS-2$
(((title == null) || title.isEmpty()) ? sourceDocument.getUuid().replaceAll("[/:]", "") : title) + // $NON-NLS-1$
".txt";
}
use of de.catma.document.source.contenthandler.SourceContentHandler in project catma by forTEXT.
the class ProjectView method addUploadFile.
private void addUploadFile(UploadFile uploadFile, boolean useApostropheAsSeparator, String collectionNamePattern) {
SourceDocumentInfo sourceDocumentInfo = new SourceDocumentInfo(uploadFile.getIndexInfoSet(useApostropheAsSeparator), uploadFile.getContentInfoSet(), uploadFile.getTechInfoSet());
SourceContentHandler contentHandler = sourceDocumentInfo.getTechInfoSet().getMimeType().equals(FileType.XML2.getMimeType()) ? new XML2ContentHandler() : new TikaContentHandler();
contentHandler.setSourceDocumentInfo(sourceDocumentInfo);
SourceDocument document = new SourceDocument(uploadFile.getUuid(), contentHandler);
try {
String content = document.getContent();
FileOSType fileOSType = FileOSType.getFileOSType(content);
sourceDocumentInfo.getTechInfoSet().setFileOSType(fileOSType);
CRC32 checksum = new CRC32();
checksum.update(content.getBytes());
sourceDocumentInfo.getTechInfoSet().setChecksum(checksum.getValue());
project.insert(document);
AnnotationCollection intrinsicMarkupCollection = uploadFile.getIntrinsicMarkupCollection();
if (intrinsicMarkupCollection != null) {
project.importCollection(Collections.emptyList(), intrinsicMarkupCollection);
}
if (collectionNamePattern != null && !collectionNamePattern.isEmpty()) {
String collectionName = collectionNamePattern.replace("{{Title}}", uploadFile.getTitle());
project.createUserMarkupCollection(collectionName, document);
}
} catch (IOException e) {
Logger.getLogger(ProjectView.class.getName()).log(Level.SEVERE, String.format("Error loading content of %1$s", uploadFile.getTempFilename().toString()), e);
String errorMsg = e.getMessage();
if ((errorMsg == null) || (errorMsg.trim().isEmpty())) {
errorMsg = "";
}
Notification.show("Error", String.format("Error loading content of %1$s! " + "This document will be skipped!\n The underlying error message was:\n%2$s", uploadFile.getTitle(), errorMsg), Type.ERROR_MESSAGE);
}
}
use of de.catma.document.source.contenthandler.SourceContentHandler in project catma by forTEXT.
the class SourceDocumentHandler method loadSourceDocument.
/**
* Constructs a Source Document.
* @param id the identifier of the source document
* @param sourceDocumentInfo the meta data of the source document
* @return the source document instance
* @throws IOException access failure
* @throws InstantiationException {@link SourceContentHandler} instantiation failure
* @throws IllegalAccessException {@link SourceContentHandler} instantiation failure
*/
public SourceDocument loadSourceDocument(String id, SourceDocumentInfo sourceDocumentInfo) throws IOException, InstantiationException, IllegalAccessException {
FileType fileType = sourceDocumentInfo.getTechInfoSet().getFileType();
if (fileType == null) {
throw new IllegalStateException("I don't know the type of this file!");
}
SourceContentHandler handler = typeHandlerMap.get(fileType).newInstance();
handler.setSourceDocumentInfo(sourceDocumentInfo);
SourceDocument document = new SourceDocument(id, handler);
return document;
}
use of de.catma.document.source.contenthandler.SourceContentHandler in project catma by forTEXT.
the class SourceDocumentExportOptionsDialog method getFilename.
private String getFilename(SourceDocument sourceDocument) {
SourceContentHandler sourceContentHandler = sourceDocument.getSourceContentHandler();
String title = sourceContentHandler.getSourceDocumentInfo().getContentInfoSet().getTitle();
if (title != null) {
// $NON-NLS-1$ //$NON-NLS-2$
title = title.replaceAll("\\s", "_");
}
return // $NON-NLS-1$ //$NON-NLS-2$
(((title == null) || title.isEmpty()) ? sourceDocument.getUuid().replaceAll("[/:]", "") : title) + "." + // $NON-NLS-1$
sourceContentHandler.getSourceDocumentInfo().getTechInfoSet().getFileType().name().toLowerCase();
}
Aggregations