use of org.olat.search.service.document.file.DocumentAccessException in project OpenOLAT by OpenOLAT.
the class PdfBoxExtractor method extractTextFromPdf.
private FileContent extractTextFromPdf(VFSLeaf leaf) throws IOException, DocumentAccessException {
if (log.isDebug())
log.debug("readContent from pdf starts...");
PDDocument document = null;
BufferedInputStream bis = null;
try {
bis = new BufferedInputStream(leaf.getInputStream());
document = PDDocument.load(bis);
if (document.isEncrypted()) {
try {
document.decrypt("");
} catch (Exception e) {
log.warn("PDF is encrypted. Can not read content file=" + leaf.getName());
LimitedContentWriter writer = new LimitedContentWriter(128, FileDocumentFactory.getMaxFileSize());
writer.append(leaf.getName());
writer.close();
return new FileContent(leaf.getName(), writer.toString());
}
}
String title = getTitle(document);
if (log.isDebug())
log.debug("readContent PDDocument loaded");
PDFTextStripper stripper = new PDFTextStripper();
LimitedContentWriter writer = new LimitedContentWriter(50000, FileDocumentFactory.getMaxFileSize());
stripper.writeText(document, writer);
writer.close();
return new FileContent(title, writer.toString());
} finally {
if (document != null) {
document.close();
}
if (bis != null) {
bis.close();
}
}
}
use of org.olat.search.service.document.file.DocumentAccessException in project OpenOLAT by OpenOLAT.
the class FolderIndexerWorker method doIndexVFSLeaf.
protected void doIndexVFSLeaf(SearchResourceContext leafResourceContext, VFSLeaf leaf, OlatFullIndexer writer, String fPath) {
if (log.isDebug())
log.debug("Analyse VFSLeaf=" + leaf.getName());
try {
if (docFactory.isFileSupported(leaf)) {
String myFilePath = fPath + "/" + leaf.getName();
leafResourceContext.setFilePath(myFilePath);
Document document = docFactory.createDocument(leafResourceContext, leaf);
if (document != null) {
// document which are disabled return null
writer.addDocument(document);
}
} else {
if (log.isDebug())
log.debug("Documenttype not supported. file=" + leaf.getName());
}
} catch (DocumentAccessException e) {
if (log.isDebug())
log.debug("Can not access document." + e.getMessage());
} catch (InterruptedException e) {
if (log.isDebug())
log.debug("InterruptedException: Can not index leaf=" + leaf.getName() + ";" + e.getMessage());
} catch (IOException ioEx) {
log.warn("IOException: Can not index leaf=" + leaf.getName(), ioEx);
} catch (Exception ex) {
log.warn("Exception: Can not index leaf=" + leaf.getName(), ex);
}
}
use of org.olat.search.service.document.file.DocumentAccessException in project OpenOLAT by OpenOLAT.
the class DialogCourseNodeIndexer method doIndexFile.
/**
* Index a file of dialog-module.
* @param filename
* @param forumKey
* @param leafResourceContext
* @param indexWriter
* @throws IOException
* @throws InterruptedException
*/
private void doIndexFile(DialogElement element, SearchResourceContext leafResourceContext, OlatFullIndexer indexWriter) throws IOException, InterruptedException {
DialogElementsManager dialogElmsMgr = CoreSpringFactory.getImpl(DialogElementsManager.class);
VFSContainer dialogContainer = dialogElmsMgr.getDialogContainer(element);
VFSLeaf leaf = (VFSLeaf) dialogContainer.getItems(new VFSLeafFilter()).get(0);
if (isLogDebugEnabled())
logDebug("Analyse VFSLeaf=" + leaf.getName());
try {
if (CoreSpringFactory.getImpl(FileDocumentFactory.class).isFileSupported(leaf)) {
leafResourceContext.setFilePath(element.getFilename());
leafResourceContext.setDocumentType(TYPE_FILE);
Document document = CoreSpringFactory.getImpl(FileDocumentFactory.class).createDocument(leafResourceContext, leaf);
indexWriter.addDocument(document);
} else {
if (isLogDebugEnabled())
logDebug("Documenttype not supported. file=" + leaf.getName());
}
} catch (DocumentAccessException e) {
if (isLogDebugEnabled())
logDebug("Can not access document." + e.getMessage());
} catch (IOException ioEx) {
logWarn("IOException: Can not index leaf=" + leaf.getName(), ioEx);
} catch (InterruptedException iex) {
throw new InterruptedException(iex.getMessage());
} catch (Exception ex) {
logWarn("Exception: Can not index leaf=" + leaf.getName(), ex);
}
}
use of org.olat.search.service.document.file.DocumentAccessException in project openolat by klemens.
the class PdfBoxExtractor method extractTextFromPdf.
private FileContent extractTextFromPdf(VFSLeaf leaf) throws IOException, DocumentAccessException {
if (log.isDebug())
log.debug("readContent from pdf starts...");
PDDocument document = null;
BufferedInputStream bis = null;
try {
bis = new BufferedInputStream(leaf.getInputStream());
document = PDDocument.load(bis);
if (document.isEncrypted()) {
try {
document.decrypt("");
} catch (Exception e) {
log.warn("PDF is encrypted. Can not read content file=" + leaf.getName());
LimitedContentWriter writer = new LimitedContentWriter(128, FileDocumentFactory.getMaxFileSize());
writer.append(leaf.getName());
writer.close();
return new FileContent(leaf.getName(), writer.toString());
}
}
String title = getTitle(document);
if (log.isDebug())
log.debug("readContent PDDocument loaded");
PDFTextStripper stripper = new PDFTextStripper();
LimitedContentWriter writer = new LimitedContentWriter(50000, FileDocumentFactory.getMaxFileSize());
stripper.writeText(document, writer);
writer.close();
return new FileContent(title, writer.toString());
} finally {
if (document != null) {
document.close();
}
if (bis != null) {
bis.close();
}
}
}
use of org.olat.search.service.document.file.DocumentAccessException in project openolat by klemens.
the class DialogCourseNodeIndexer method doIndexFile.
/**
* Index a file of dialog-module.
* @param filename
* @param forumKey
* @param leafResourceContext
* @param indexWriter
* @throws IOException
* @throws InterruptedException
*/
private void doIndexFile(DialogElement element, SearchResourceContext leafResourceContext, OlatFullIndexer indexWriter) throws IOException, InterruptedException {
DialogElementsManager dialogElmsMgr = CoreSpringFactory.getImpl(DialogElementsManager.class);
VFSContainer dialogContainer = dialogElmsMgr.getDialogContainer(element);
VFSLeaf leaf = (VFSLeaf) dialogContainer.getItems(new VFSLeafFilter()).get(0);
if (isLogDebugEnabled())
logDebug("Analyse VFSLeaf=" + leaf.getName());
try {
if (CoreSpringFactory.getImpl(FileDocumentFactory.class).isFileSupported(leaf)) {
leafResourceContext.setFilePath(element.getFilename());
leafResourceContext.setDocumentType(TYPE_FILE);
Document document = CoreSpringFactory.getImpl(FileDocumentFactory.class).createDocument(leafResourceContext, leaf);
indexWriter.addDocument(document);
} else {
if (isLogDebugEnabled())
logDebug("Documenttype not supported. file=" + leaf.getName());
}
} catch (DocumentAccessException e) {
if (isLogDebugEnabled())
logDebug("Can not access document." + e.getMessage());
} catch (IOException ioEx) {
logWarn("IOException: Can not index leaf=" + leaf.getName(), ioEx);
} catch (InterruptedException iex) {
throw new InterruptedException(iex.getMessage());
} catch (Exception ex) {
logWarn("Exception: Can not index leaf=" + leaf.getName(), ex);
}
}
Aggregations