Search in sources :

Example 11 with LimitedContentWriter

use of org.olat.core.util.io.LimitedContentWriter in project openolat by klemens.

the class PowerPointDocument method readContent.

@Override
public FileContent readContent(VFSLeaf leaf) throws IOException, DocumentException {
    if (log.isDebug())
        log.debug("read PPT Content of leaf=" + leaf.getName());
    try (BufferedInputStream bis = new BufferedInputStream(leaf.getInputStream())) {
        LimitedContentWriter oStream = new LimitedContentWriter(100000, FileDocumentFactory.getMaxFileSize());
        extractText(bis, oStream);
        return new FileContent(oStream.toString());
    } catch (Exception e) {
        throw new DocumentException("Can not read PPT Content. File=" + leaf.getName(), e);
    }
}
Also used : LimitedContentWriter(org.olat.core.util.io.LimitedContentWriter) BufferedInputStream(java.io.BufferedInputStream) IOException(java.io.IOException)

Example 12 with LimitedContentWriter

use of org.olat.core.util.io.LimitedContentWriter in project openolat by klemens.

the class WordDocument method readContent.

@Override
protected FileContent readContent(VFSLeaf leaf) throws IOException, DocumentException {
    LimitedContentWriter sb = new LimitedContentWriter((int) leaf.getSize(), FileDocumentFactory.getMaxFileSize());
    try (InputStream bis = new BufferedInputStream(leaf.getInputStream())) {
        POIFSFileSystem filesystem = new POIFSFileSystem(bis);
        Iterator<?> entries = filesystem.getRoot().getEntries();
        while (entries.hasNext()) {
            Entry entry = (Entry) entries.next();
            String name = entry.getName();
            if (!(entry instanceof DocumentEntry)) {
            // Skip directory entries
            } else if ("WordDocument".equals(name)) {
                collectWordDocument(leaf, filesystem, sb);
            }
        }
        return new FileContent(sb.toString());
    } catch (Exception e) {
        log.warn("could not read in word document: " + leaf + " please check, that this is not an docx/rtf/html file!");
        throw new DocumentException(e.getMessage());
    }
}
Also used : LimitedContentWriter(org.olat.core.util.io.LimitedContentWriter) Entry(org.apache.poi.poifs.filesystem.Entry) DocumentEntry(org.apache.poi.poifs.filesystem.DocumentEntry) BufferedInputStream(java.io.BufferedInputStream) BufferedInputStream(java.io.BufferedInputStream) InputStream(java.io.InputStream) POIFSFileSystem(org.apache.poi.poifs.filesystem.POIFSFileSystem) DocumentEntry(org.apache.poi.poifs.filesystem.DocumentEntry) IOException(java.io.IOException) OldWordFileFormatException(org.apache.poi.hwpf.OldWordFileFormatException)

Example 13 with LimitedContentWriter

use of org.olat.core.util.io.LimitedContentWriter in project openolat by klemens.

the class PdfBoxExtractor method extractTextFromPdf.

private FileContent extractTextFromPdf(VFSLeaf leaf) throws IOException, DocumentAccessException {
    if (log.isDebug())
        log.debug("readContent from pdf starts...");
    PDDocument document = null;
    BufferedInputStream bis = null;
    try {
        bis = new BufferedInputStream(leaf.getInputStream());
        document = PDDocument.load(bis);
        if (document.isEncrypted()) {
            try {
                document.decrypt("");
            } catch (Exception e) {
                log.warn("PDF is encrypted. Can not read content file=" + leaf.getName());
                LimitedContentWriter writer = new LimitedContentWriter(128, FileDocumentFactory.getMaxFileSize());
                writer.append(leaf.getName());
                writer.close();
                return new FileContent(leaf.getName(), writer.toString());
            }
        }
        String title = getTitle(document);
        if (log.isDebug())
            log.debug("readContent PDDocument loaded");
        PDFTextStripper stripper = new PDFTextStripper();
        LimitedContentWriter writer = new LimitedContentWriter(50000, FileDocumentFactory.getMaxFileSize());
        stripper.writeText(document, writer);
        writer.close();
        return new FileContent(title, writer.toString());
    } finally {
        if (document != null) {
            document.close();
        }
        if (bis != null) {
            bis.close();
        }
    }
}
Also used : LimitedContentWriter(org.olat.core.util.io.LimitedContentWriter) FileContent(org.olat.search.service.document.file.FileContent) BufferedInputStream(java.io.BufferedInputStream) PDDocument(org.apache.pdfbox.pdmodel.PDDocument) IOException(java.io.IOException) DocumentAccessException(org.olat.search.service.document.file.DocumentAccessException) PDFTextStripper(org.apache.pdfbox.util.PDFTextStripper)

Example 14 with LimitedContentWriter

use of org.olat.core.util.io.LimitedContentWriter in project OpenOLAT by OpenOLAT.

the class PowerPointOOXMLDocument method readContent.

@Override
public FileContent readContent(VFSLeaf leaf) throws IOException, DocumentException {
    File file = ((JavaIOItem) leaf).getBasefile();
    LimitedContentWriter writer = new LimitedContentWriter(100000, FileDocumentFactory.getMaxFileSize());
    try (ZipFile wordFile = new ZipFile(file)) {
        List<String> contents = new ArrayList<>();
        for (Enumeration<? extends ZipEntry> entriesEnumeration = wordFile.entries(); entriesEnumeration.hasMoreElements(); ) {
            ZipEntry entry = entriesEnumeration.nextElement();
            String name = entry.getName();
            if (name.startsWith(SLIDE) && name.endsWith(".xml")) {
                contents.add(name);
            }
        }
        if (contents.size() > 1) {
            Collections.sort(contents, new PowerPointDocumentComparator());
        }
        for (String content : contents) {
            if (writer.accept()) {
                ZipEntry entry = wordFile.getEntry(content);
                InputStream zip = wordFile.getInputStream(entry);
                OfficeDocumentHandler dh = new OfficeDocumentHandler(writer);
                parse(new ShieldInputStream(zip), dh);
                zip.close();
            }
        }
    } catch (DocumentException e) {
        throw e;
    } catch (Exception e) {
        throw new DocumentException(e.getMessage());
    }
    return new FileContent(writer.toString());
}
Also used : JavaIOItem(org.olat.core.util.vfs.JavaIOItem) ShieldInputStream(org.olat.core.util.io.ShieldInputStream) InputStream(java.io.InputStream) ZipEntry(java.util.zip.ZipEntry) ArrayList(java.util.ArrayList) ShieldInputStream(org.olat.core.util.io.ShieldInputStream) IOException(java.io.IOException) LimitedContentWriter(org.olat.core.util.io.LimitedContentWriter) ZipFile(java.util.zip.ZipFile) File(java.io.File) ZipFile(java.util.zip.ZipFile)

Example 15 with LimitedContentWriter

use of org.olat.core.util.io.LimitedContentWriter in project openolat by klemens.

the class ExcelOOXMLDocument method parseSheets.

private String parseSheets(Map<String, String> sharedStrings, VFSLeaf leaf) throws IOException, DocumentException {
    try (InputStream stream = leaf.getInputStream();
        ZipInputStream zip = new ZipInputStream(stream)) {
        ZipEntry entry = zip.getNextEntry();
        LimitedContentWriter writer = new LimitedContentWriter(100000, FileDocumentFactory.getMaxFileSize());
        while (entry != null) {
            if (writer.accept()) {
                String name = entry.getName();
                if (name.startsWith(SHEET) && name.endsWith(".xml")) {
                    OfficeDocumentHandler dh = new OfficeDocumentHandler(writer, sharedStrings);
                    parse(new ShieldInputStream(zip), dh);
                }
            }
            entry = zip.getNextEntry();
        }
        return writer.toString();
    } catch (DocumentException e) {
        throw e;
    } catch (Exception e) {
        throw new DocumentException(e.getMessage());
    }
}
Also used : LimitedContentWriter(org.olat.core.util.io.LimitedContentWriter) ZipInputStream(java.util.zip.ZipInputStream) ZipInputStream(java.util.zip.ZipInputStream) ShieldInputStream(org.olat.core.util.io.ShieldInputStream) InputStream(java.io.InputStream) ZipEntry(java.util.zip.ZipEntry) ShieldInputStream(org.olat.core.util.io.ShieldInputStream) IOException(java.io.IOException) SAXException(org.xml.sax.SAXException)

Aggregations

IOException (java.io.IOException)18 LimitedContentWriter (org.olat.core.util.io.LimitedContentWriter)18 BufferedInputStream (java.io.BufferedInputStream)8 InputStream (java.io.InputStream)8 ZipEntry (java.util.zip.ZipEntry)6 ShieldInputStream (org.olat.core.util.io.ShieldInputStream)6 File (java.io.File)4 ArrayList (java.util.ArrayList)4 ZipFile (java.util.zip.ZipFile)4 POIFSFileSystem (org.apache.poi.poifs.filesystem.POIFSFileSystem)4 JavaIOItem (org.olat.core.util.vfs.JavaIOItem)4 BufferedReader (java.io.BufferedReader)2 FileReader (java.io.FileReader)2 InputStreamReader (java.io.InputStreamReader)2 ZipInputStream (java.util.zip.ZipInputStream)2 PDDocument (org.apache.pdfbox.pdmodel.PDDocument)2 PDFTextStripper (org.apache.pdfbox.util.PDFTextStripper)2 HSSFCell (org.apache.poi.hssf.usermodel.HSSFCell)2 HSSFRow (org.apache.poi.hssf.usermodel.HSSFRow)2 HSSFSheet (org.apache.poi.hssf.usermodel.HSSFSheet)2