use of de.catma.document.source.SourceDocumentInfo in project catma by forTEXT.
the class InspectContentStep method updatePreview.
private void updatePreview(UploadFile uploadFile) {
Tika tika = new Tika();
Metadata metadata = new Metadata();
MediaType type = MediaType.parse(uploadFile.getMimetype());
if (type.getBaseType().toString().equals(FileType.TEXT.getMimeType())) {
metadata.set(Metadata.CONTENT_TYPE, new MediaType(type, uploadFile.getCharset()).toString());
}
try {
String content = "";
SourceDocumentInfo sourceDocumentInfo = new SourceDocumentInfo();
IndexInfoSet indexInfoSet = new IndexInfoSet(Collections.emptyList(), Collections.emptyList(), uploadFile.getLocale());
if (uploadFile.getMimetype().equals(FileType.XML2.getMimeType())) {
XML2ContentHandler contentHandler = new XML2ContentHandler();
TechInfoSet techInfoSet = new TechInfoSet(uploadFile.getOriginalFilename(), uploadFile.getMimetype(), uploadFile.getTempFilename());
sourceDocumentInfo.setTechInfoSet(techInfoSet);
contentHandler.setSourceDocumentInfo(sourceDocumentInfo);
contentHandler.load();
content = contentHandler.getContent();
} else {
try (FileInputStream fis = new FileInputStream(new File(uploadFile.getTempFilename()))) {
content = tika.parseToString(fis, metadata, 3000);
}
}
if (!content.isEmpty()) {
content += " [...] ";
}
taPreview.setValue(content);
if (indexInfoSet.isRightToLeftWriting()) {
taPreview.addStyleName("document-wizard-rtl-preview");
} else {
taPreview.removeStyleName("document-wizard-rtl-preview");
}
} catch (Exception e) {
Logger.getLogger(InspectContentStep.class.getName()).log(Level.SEVERE, String.format("Error loading preview of %1$s", uploadFile.getOriginalFilename()), e);
String errorMsg = e.getMessage();
if ((errorMsg == null) || (errorMsg.trim().isEmpty())) {
errorMsg = "";
}
Notification.show("Error", String.format("Error loading content of %1$s! " + "Adding this file to your Project might fail!\n The underlying error message was:\n%2$s", uploadFile.getOriginalFilename(), errorMsg), Type.ERROR_MESSAGE);
}
}
use of de.catma.document.source.SourceDocumentInfo in project catma by forTEXT.
the class TeiSourceDocumentInfoSerializationHandler method deserialize.
private SourceDocumentInfo deserialize(TeiDocument teiDocument) {
ContentInfoSet contentInfoSet = teiDocument.getContentInfoSet();
TechInfoSet techInfoSet = teiDocument.getTechInfoset();
IndexInfoSet indexInfoSet = teiDocument.getIndexInfoSet();
return new SourceDocumentInfo(indexInfoSet, contentInfoSet, techInfoSet);
}
Aggregations