use of org.olat.core.util.io.LimitedContentWriter in project OpenOLAT by OpenOLAT.
the class WordOOXMLDocument method readContent.
@Override
public FileContent readContent(VFSLeaf leaf) throws IOException, DocumentException {
File file = ((JavaIOItem) leaf).getBasefile();
LimitedContentWriter writer = new LimitedContentWriter(100000, FileDocumentFactory.getMaxFileSize());
try (ZipFile wordFile = new ZipFile(file)) {
List<String> contents = new ArrayList<>();
for (Enumeration<? extends ZipEntry> entriesEnumeration = wordFile.entries(); entriesEnumeration.hasMoreElements(); ) {
ZipEntry entry = entriesEnumeration.nextElement();
String name = entry.getName();
if (name.endsWith("word/document.xml")) {
contents.add(name);
} else if (name.startsWith(HEADER) && name.endsWith(".xml")) {
contents.add(name);
} else if (name.startsWith(FOOTER) && name.endsWith(".xml")) {
contents.add(name);
}
}
if (contents.size() > 1) {
Collections.sort(contents, new WordDocumentComparator());
}
for (String content : contents) {
if (writer.accept()) {
ZipEntry entry = wordFile.getEntry(content);
InputStream zip = wordFile.getInputStream(entry);
OfficeDocumentHandler dh = new OfficeDocumentHandler(writer);
parse(new ShieldInputStream(zip), dh);
zip.close();
}
}
} catch (DocumentException e) {
throw e;
} catch (Exception e) {
throw new DocumentException(e.getMessage());
}
return new FileContent(writer.toString());
}
use of org.olat.core.util.io.LimitedContentWriter in project OpenOLAT by OpenOLAT.
the class PdfBoxExtractor method extractTextFromPdf.
private FileContent extractTextFromPdf(VFSLeaf leaf) throws IOException, DocumentAccessException {
if (log.isDebug())
log.debug("readContent from pdf starts...");
PDDocument document = null;
BufferedInputStream bis = null;
try {
bis = new BufferedInputStream(leaf.getInputStream());
document = PDDocument.load(bis);
if (document.isEncrypted()) {
try {
document.decrypt("");
} catch (Exception e) {
log.warn("PDF is encrypted. Can not read content file=" + leaf.getName());
LimitedContentWriter writer = new LimitedContentWriter(128, FileDocumentFactory.getMaxFileSize());
writer.append(leaf.getName());
writer.close();
return new FileContent(leaf.getName(), writer.toString());
}
}
String title = getTitle(document);
if (log.isDebug())
log.debug("readContent PDDocument loaded");
PDFTextStripper stripper = new PDFTextStripper();
LimitedContentWriter writer = new LimitedContentWriter(50000, FileDocumentFactory.getMaxFileSize());
stripper.writeText(document, writer);
writer.close();
return new FileContent(title, writer.toString());
} finally {
if (document != null) {
document.close();
}
if (bis != null) {
bis.close();
}
}
}
use of org.olat.core.util.io.LimitedContentWriter in project OpenOLAT by OpenOLAT.
the class ExcelDocument method readContent.
@Override
protected FileContent readContent(VFSLeaf leaf) throws IOException, DocumentException {
int cellNullCounter = 0;
int rowNullCounter = 0;
int sheetNullCounter = 0;
try (BufferedInputStream bis = new BufferedInputStream(leaf.getInputStream());
HSSFWorkbook workbook = new HSSFWorkbook(new POIFSFileSystem(bis))) {
LimitedContentWriter content = new LimitedContentWriter((int) leaf.getSize(), FileDocumentFactory.getMaxFileSize());
for (int sheetNumber = 0; sheetNumber < workbook.getNumberOfSheets(); sheetNumber++) {
HSSFSheet sheet = workbook.getSheetAt(sheetNumber);
if (sheet != null) {
for (int rowNumber = sheet.getFirstRowNum(); rowNumber <= sheet.getLastRowNum(); rowNumber++) {
HSSFRow row = sheet.getRow(rowNumber);
if (row != null) {
for (int cellNumber = row.getFirstCellNum(); cellNumber <= row.getLastCellNum(); cellNumber++) {
HSSFCell cell = row.getCell(cellNumber);
if (cell != null) {
if (cell.getCellTypeEnum() == CellType.STRING) {
content.append(cell.getStringCellValue()).append(' ');
}
} else {
cellNullCounter++;
}
}
} else {
rowNullCounter++;
}
}
} else {
sheetNullCounter++;
}
}
if (log.isDebug()) {
if ((cellNullCounter > 0) || (rowNullCounter > 0) || (sheetNullCounter > 0)) {
log.debug("Read Excel content cell=null #:" + cellNullCounter + ", row=null #:" + rowNullCounter + ", sheet=null #:" + sheetNullCounter);
}
}
content.close();
return new FileContent(content.toString());
} catch (Exception ex) {
throw new DocumentException("Can not read XLS Content. File=" + leaf.getName(), ex);
}
}
use of org.olat.core.util.io.LimitedContentWriter in project openolat by klemens.
the class ExcelDocument method readContent.
@Override
protected FileContent readContent(VFSLeaf leaf) throws IOException, DocumentException {
int cellNullCounter = 0;
int rowNullCounter = 0;
int sheetNullCounter = 0;
try (BufferedInputStream bis = new BufferedInputStream(leaf.getInputStream());
HSSFWorkbook workbook = new HSSFWorkbook(new POIFSFileSystem(bis))) {
LimitedContentWriter content = new LimitedContentWriter((int) leaf.getSize(), FileDocumentFactory.getMaxFileSize());
for (int sheetNumber = 0; sheetNumber < workbook.getNumberOfSheets(); sheetNumber++) {
HSSFSheet sheet = workbook.getSheetAt(sheetNumber);
if (sheet != null) {
for (int rowNumber = sheet.getFirstRowNum(); rowNumber <= sheet.getLastRowNum(); rowNumber++) {
HSSFRow row = sheet.getRow(rowNumber);
if (row != null) {
for (int cellNumber = row.getFirstCellNum(); cellNumber <= row.getLastCellNum(); cellNumber++) {
HSSFCell cell = row.getCell(cellNumber);
if (cell != null) {
if (cell.getCellTypeEnum() == CellType.STRING) {
content.append(cell.getStringCellValue()).append(' ');
}
} else {
cellNullCounter++;
}
}
} else {
rowNullCounter++;
}
}
} else {
sheetNullCounter++;
}
}
if (log.isDebug()) {
if ((cellNullCounter > 0) || (rowNullCounter > 0) || (sheetNullCounter > 0)) {
log.debug("Read Excel content cell=null #:" + cellNullCounter + ", row=null #:" + rowNullCounter + ", sheet=null #:" + sheetNullCounter);
}
}
content.close();
return new FileContent(content.toString());
} catch (Exception ex) {
throw new DocumentException("Can not read XLS Content. File=" + leaf.getName(), ex);
}
}
use of org.olat.core.util.io.LimitedContentWriter in project openolat by klemens.
the class PdfDocument method getPdfTextFromBuffer.
private FileContent getPdfTextFromBuffer(File pdfTextFile) throws IOException {
if (log.isDebug())
log.debug("readContent from text file start...");
try (BufferedReader br = new BufferedReader(new FileReader(pdfTextFile));
LimitedContentWriter sb = new LimitedContentWriter(5000, FileDocumentFactory.getMaxFileSize())) {
// search the title
char[] cbuf = new char[4096];
int length = br.read(cbuf);
int indexSep = 0;
String title = "";
if (length > 0) {
String firstChunk = new String(cbuf, 0, length);
indexSep = firstChunk.indexOf("\u00A0|\u00A0");
if (indexSep > 0) {
title = firstChunk.substring(0, indexSep);
sb.append(firstChunk.substring(indexSep + 3));
} else {
sb.append(firstChunk);
}
while ((length = br.read(cbuf)) > 0) {
sb.write(cbuf, 0, length);
}
}
return new FileContent(title, sb.toString());
} catch (IOException e) {
throw e;
}
}
Aggregations