use of org.olat.core.util.io.LimitedContentWriter in project openolat by klemens.
the class PowerPointDocument method readContent.
@Override
public FileContent readContent(VFSLeaf leaf) throws IOException, DocumentException {
if (log.isDebug())
log.debug("read PPT Content of leaf=" + leaf.getName());
try (BufferedInputStream bis = new BufferedInputStream(leaf.getInputStream())) {
LimitedContentWriter oStream = new LimitedContentWriter(100000, FileDocumentFactory.getMaxFileSize());
extractText(bis, oStream);
return new FileContent(oStream.toString());
} catch (Exception e) {
throw new DocumentException("Can not read PPT Content. File=" + leaf.getName(), e);
}
}
use of org.olat.core.util.io.LimitedContentWriter in project openolat by klemens.
the class WordDocument method readContent.
@Override
protected FileContent readContent(VFSLeaf leaf) throws IOException, DocumentException {
LimitedContentWriter sb = new LimitedContentWriter((int) leaf.getSize(), FileDocumentFactory.getMaxFileSize());
try (InputStream bis = new BufferedInputStream(leaf.getInputStream())) {
POIFSFileSystem filesystem = new POIFSFileSystem(bis);
Iterator<?> entries = filesystem.getRoot().getEntries();
while (entries.hasNext()) {
Entry entry = (Entry) entries.next();
String name = entry.getName();
if (!(entry instanceof DocumentEntry)) {
// Skip directory entries
} else if ("WordDocument".equals(name)) {
collectWordDocument(leaf, filesystem, sb);
}
}
return new FileContent(sb.toString());
} catch (Exception e) {
log.warn("could not read in word document: " + leaf + " please check, that this is not an docx/rtf/html file!");
throw new DocumentException(e.getMessage());
}
}
use of org.olat.core.util.io.LimitedContentWriter in project openolat by klemens.
the class PdfBoxExtractor method extractTextFromPdf.
private FileContent extractTextFromPdf(VFSLeaf leaf) throws IOException, DocumentAccessException {
if (log.isDebug())
log.debug("readContent from pdf starts...");
PDDocument document = null;
BufferedInputStream bis = null;
try {
bis = new BufferedInputStream(leaf.getInputStream());
document = PDDocument.load(bis);
if (document.isEncrypted()) {
try {
document.decrypt("");
} catch (Exception e) {
log.warn("PDF is encrypted. Can not read content file=" + leaf.getName());
LimitedContentWriter writer = new LimitedContentWriter(128, FileDocumentFactory.getMaxFileSize());
writer.append(leaf.getName());
writer.close();
return new FileContent(leaf.getName(), writer.toString());
}
}
String title = getTitle(document);
if (log.isDebug())
log.debug("readContent PDDocument loaded");
PDFTextStripper stripper = new PDFTextStripper();
LimitedContentWriter writer = new LimitedContentWriter(50000, FileDocumentFactory.getMaxFileSize());
stripper.writeText(document, writer);
writer.close();
return new FileContent(title, writer.toString());
} finally {
if (document != null) {
document.close();
}
if (bis != null) {
bis.close();
}
}
}
use of org.olat.core.util.io.LimitedContentWriter in project OpenOLAT by OpenOLAT.
the class PowerPointOOXMLDocument method readContent.
@Override
public FileContent readContent(VFSLeaf leaf) throws IOException, DocumentException {
File file = ((JavaIOItem) leaf).getBasefile();
LimitedContentWriter writer = new LimitedContentWriter(100000, FileDocumentFactory.getMaxFileSize());
try (ZipFile wordFile = new ZipFile(file)) {
List<String> contents = new ArrayList<>();
for (Enumeration<? extends ZipEntry> entriesEnumeration = wordFile.entries(); entriesEnumeration.hasMoreElements(); ) {
ZipEntry entry = entriesEnumeration.nextElement();
String name = entry.getName();
if (name.startsWith(SLIDE) && name.endsWith(".xml")) {
contents.add(name);
}
}
if (contents.size() > 1) {
Collections.sort(contents, new PowerPointDocumentComparator());
}
for (String content : contents) {
if (writer.accept()) {
ZipEntry entry = wordFile.getEntry(content);
InputStream zip = wordFile.getInputStream(entry);
OfficeDocumentHandler dh = new OfficeDocumentHandler(writer);
parse(new ShieldInputStream(zip), dh);
zip.close();
}
}
} catch (DocumentException e) {
throw e;
} catch (Exception e) {
throw new DocumentException(e.getMessage());
}
return new FileContent(writer.toString());
}
use of org.olat.core.util.io.LimitedContentWriter in project openolat by klemens.
the class ExcelOOXMLDocument method parseSheets.
private String parseSheets(Map<String, String> sharedStrings, VFSLeaf leaf) throws IOException, DocumentException {
try (InputStream stream = leaf.getInputStream();
ZipInputStream zip = new ZipInputStream(stream)) {
ZipEntry entry = zip.getNextEntry();
LimitedContentWriter writer = new LimitedContentWriter(100000, FileDocumentFactory.getMaxFileSize());
while (entry != null) {
if (writer.accept()) {
String name = entry.getName();
if (name.startsWith(SHEET) && name.endsWith(".xml")) {
OfficeDocumentHandler dh = new OfficeDocumentHandler(writer, sharedStrings);
parse(new ShieldInputStream(zip), dh);
}
}
entry = zip.getNextEntry();
}
return writer.toString();
} catch (DocumentException e) {
throw e;
} catch (Exception e) {
throw new DocumentException(e.getMessage());
}
}
Aggregations