Search in sources :

Example 91 with AttributesImpl

use of org.xml.sax.helpers.AttributesImpl in project tika by apache.

the class OutlookPSTParser method createAttribute.

private static AttributesImpl createAttribute(String attName, String attValue) {
    AttributesImpl attributes = new AttributesImpl();
    attributes.addAttribute("", attName, attName, "CDATA", attValue);
    return attributes;
}
Also used : AttributesImpl(org.xml.sax.helpers.AttributesImpl)

Example 92 with AttributesImpl

use of org.xml.sax.helpers.AttributesImpl in project webservices-axiom by apache.

the class TestGetSAXResultWithDTD method runTest.

@Override
protected void runTest() throws Throwable {
    OMElement root = metaFactory.getOMFactory().createOMElement("root", null);
    SAXResult result = root.getSAXResult();
    LexicalHandler lexicalHandler = result.getLexicalHandler();
    ContentHandler contentHandler = result.getHandler();
    contentHandler.startDocument();
    lexicalHandler.startDTD("test", null, "my.dtd");
    lexicalHandler.endDTD();
    contentHandler.startElement("", "test", "test", new AttributesImpl());
    contentHandler.endElement("", "test", "test");
    contentHandler.endDocument();
    OMNode child = root.getFirstOMChild();
    assertTrue(child instanceof OMElement);
    assertEquals("test", ((OMElement) child).getLocalName());
}
Also used : OMNode(org.apache.axiom.om.OMNode) AttributesImpl(org.xml.sax.helpers.AttributesImpl) SAXResult(javax.xml.transform.sax.SAXResult) LexicalHandler(org.xml.sax.ext.LexicalHandler) OMElement(org.apache.axiom.om.OMElement) ContentHandler(org.xml.sax.ContentHandler)

Example 93 with AttributesImpl

use of org.xml.sax.helpers.AttributesImpl in project tika by apache.

the class AbstractPDF2XHTML method endPage.

@Override
protected void endPage(PDPage page) throws IOException {
    try {
        for (PDAnnotation annotation : page.getAnnotations()) {
            if (annotation instanceof PDAnnotationFileAttachment) {
                PDAnnotationFileAttachment fann = (PDAnnotationFileAttachment) annotation;
                PDComplexFileSpecification fileSpec = (PDComplexFileSpecification) fann.getFile();
                try {
                    AttributesImpl attributes = new AttributesImpl();
                    attributes.addAttribute("", "source", "source", "CDATA", "annotation");
                    extractMultiOSPDEmbeddedFiles(fann.getAttachmentName(), fileSpec, attributes);
                } catch (SAXException e) {
                    throw new IOExceptionWithCause("file embedded in annotation sax exception", e);
                } catch (TikaException e) {
                    throw new IOExceptionWithCause("file embedded in annotation tika exception", e);
                } catch (IOException e) {
                    handleCatchableIOE(e);
                }
            } else if (annotation instanceof PDAnnotationWidget) {
                handleWidget((PDAnnotationWidget) annotation);
            }
            // TODO: remove once PDFBOX-1143 is fixed:
            if (config.getExtractAnnotationText()) {
                PDActionURI uri = getActionURI(annotation);
                if (uri != null) {
                    String link = uri.getURI();
                    if (link != null && link.trim().length() > 0) {
                        xhtml.startElement("div", "class", "annotation");
                        xhtml.startElement("a", "href", link);
                        xhtml.characters(link);
                        xhtml.endElement("a");
                        xhtml.endElement("div");
                    }
                }
                if (annotation instanceof PDAnnotationMarkup) {
                    PDAnnotationMarkup annotationMarkup = (PDAnnotationMarkup) annotation;
                    String title = annotationMarkup.getTitlePopup();
                    String subject = annotationMarkup.getSubject();
                    String contents = annotationMarkup.getContents();
                    // TODO: maybe also annotationMarkup.getRichContents()?
                    if (title != null || subject != null || contents != null) {
                        xhtml.startElement("div", "class", "annotation");
                        if (title != null) {
                            xhtml.startElement("div", "class", "annotationTitle");
                            xhtml.characters(title);
                            xhtml.endElement("div");
                        }
                        if (subject != null) {
                            xhtml.startElement("div", "class", "annotationSubject");
                            xhtml.characters(subject);
                            xhtml.endElement("div");
                        }
                        if (contents != null) {
                            xhtml.startElement("div", "class", "annotationContents");
                            xhtml.characters(contents);
                            xhtml.endElement("div");
                        }
                        xhtml.endElement("div");
                    }
                }
            }
        }
        if (config.getOcrStrategy().equals(PDFParserConfig.OCR_STRATEGY.OCR_AND_TEXT_EXTRACTION)) {
            doOCROnCurrentPage();
        }
        PDPageAdditionalActions pageActions = page.getActions();
        if (pageActions != null) {
            handleDestinationOrAction(pageActions.getC(), ActionTrigger.PAGE_CLOSE);
            handleDestinationOrAction(pageActions.getO(), ActionTrigger.PAGE_OPEN);
        }
        xhtml.endElement("div");
    } catch (SAXException | TikaException e) {
        throw new IOExceptionWithCause("Unable to end a page", e);
    } catch (IOException e) {
        exceptions.add(e);
    } finally {
        pageIndex++;
    }
}
Also used : TikaException(org.apache.tika.exception.TikaException) PDAnnotationFileAttachment(org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationFileAttachment) PDAnnotation(org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation) PDAnnotationMarkup(org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationMarkup) IOException(java.io.IOException) PDComplexFileSpecification(org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification) SAXException(org.xml.sax.SAXException) PDPageAdditionalActions(org.apache.pdfbox.pdmodel.interactive.action.PDPageAdditionalActions) IOExceptionWithCause(org.apache.commons.io.IOExceptionWithCause) AttributesImpl(org.xml.sax.helpers.AttributesImpl) PDAnnotationWidget(org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget) PDActionURI(org.apache.pdfbox.pdmodel.interactive.action.PDActionURI)

Example 94 with AttributesImpl

use of org.xml.sax.helpers.AttributesImpl in project tika by apache.

the class AbstractPDF2XHTML method handleSignature.

private void handleSignature(AttributesImpl parentAttributes, PDSignatureField sigField) throws SAXException {
    PDSignature sig = sigField.getSignature();
    if (sig == null) {
        return;
    }
    Map<String, String> vals = new TreeMap<>();
    vals.put("name", sig.getName());
    vals.put("contactInfo", sig.getContactInfo());
    vals.put("location", sig.getLocation());
    vals.put("reason", sig.getReason());
    Calendar cal = sig.getSignDate();
    if (cal != null) {
        dateFormat.setTimeZone(cal.getTimeZone());
        vals.put("date", dateFormat.format(cal.getTime()));
    }
    //see if there is any data
    int nonNull = 0;
    for (String val : vals.keySet()) {
        if (val != null && !val.equals("")) {
            nonNull++;
        }
    }
    //if there is, process it
    if (nonNull > 0) {
        xhtml.startElement("li", parentAttributes);
        AttributesImpl attrs = new AttributesImpl();
        attrs.addAttribute("", "type", "type", "CDATA", "signaturedata");
        xhtml.startElement("ol", attrs);
        for (Map.Entry<String, String> e : vals.entrySet()) {
            if (e.getValue() == null || e.getValue().equals("")) {
                continue;
            }
            attrs = new AttributesImpl();
            attrs.addAttribute("", "signdata", "signdata", "CDATA", e.getKey());
            xhtml.startElement("li", attrs);
            xhtml.characters(e.getValue());
            xhtml.endElement("li");
        }
        xhtml.endElement("ol");
        xhtml.endElement("li");
    }
}
Also used : AttributesImpl(org.xml.sax.helpers.AttributesImpl) Calendar(java.util.Calendar) TreeMap(java.util.TreeMap) PDSignature(org.apache.pdfbox.pdmodel.interactive.digitalsignature.PDSignature) Map(java.util.Map) TreeMap(java.util.TreeMap)

Example 95 with AttributesImpl

use of org.xml.sax.helpers.AttributesImpl in project tika by apache.

the class AbstractPDF2XHTML method handleDestinationOrAction.

private void handleDestinationOrAction(PDDestinationOrAction action, ActionTrigger actionTrigger) throws IOException, SAXException, TikaException {
    if (action == null || !config.getExtractActions()) {
        return;
    }
    AttributesImpl attributes = new AttributesImpl();
    String actionOrDestString = (action instanceof PDAction) ? "action" : "destination";
    addNonNullAttribute("class", actionOrDestString, attributes);
    addNonNullAttribute("type", action.getClass().getSimpleName(), attributes);
    addNonNullAttribute("trigger", actionTrigger.name(), attributes);
    if (action instanceof PDActionImportData) {
        processDoc("", ((PDActionImportData) action).getFile(), attributes);
    } else if (action instanceof PDActionLaunch) {
        PDActionLaunch pdActionLaunch = (PDActionLaunch) action;
        addNonNullAttribute("id", pdActionLaunch.getF(), attributes);
        addNonNullAttribute("defaultDirectory", pdActionLaunch.getD(), attributes);
        addNonNullAttribute("operation", pdActionLaunch.getO(), attributes);
        addNonNullAttribute("parameters", pdActionLaunch.getP(), attributes);
        processDoc(pdActionLaunch.getF(), pdActionLaunch.getFile(), attributes);
    } else if (action instanceof PDActionRemoteGoTo) {
        PDActionRemoteGoTo remoteGoTo = (PDActionRemoteGoTo) action;
        processDoc("", remoteGoTo.getFile(), attributes);
    } else if (action instanceof PDActionJavaScript) {
        PDActionJavaScript jsAction = (PDActionJavaScript) action;
        Metadata m = new Metadata();
        m.set(Metadata.CONTENT_TYPE, "application/javascript");
        m.set(Metadata.CONTENT_ENCODING, StandardCharsets.UTF_8.toString());
        m.set(PDF.ACTION_TRIGGER, actionTrigger.toString());
        m.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE, TikaCoreProperties.EmbeddedResourceType.MACRO.name());
        String js = jsAction.getAction();
        js = (js == null) ? "" : js;
        if (embeddedDocumentExtractor.shouldParseEmbedded(m)) {
            try (InputStream is = TikaInputStream.get(js.getBytes(StandardCharsets.UTF_8))) {
                embeddedDocumentExtractor.parseEmbedded(is, xhtml, m, false);
            }
        }
        addNonNullAttribute("class", "javascript", attributes);
        addNonNullAttribute("type", jsAction.getType(), attributes);
        addNonNullAttribute("subtype", jsAction.getSubType(), attributes);
        xhtml.startElement("div", attributes);
        xhtml.endElement("div");
    } else {
        xhtml.startElement("div", attributes);
        xhtml.endElement("div");
    }
}
Also used : PDAction(org.apache.pdfbox.pdmodel.interactive.action.PDAction) PDActionImportData(org.apache.pdfbox.pdmodel.interactive.action.PDActionImportData) PDActionLaunch(org.apache.pdfbox.pdmodel.interactive.action.PDActionLaunch) AttributesImpl(org.xml.sax.helpers.AttributesImpl) BufferedInputStream(java.io.BufferedInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) TikaInputStream(org.apache.tika.io.TikaInputStream) InputStream(java.io.InputStream) Metadata(org.apache.tika.metadata.Metadata) PDActionJavaScript(org.apache.pdfbox.pdmodel.interactive.action.PDActionJavaScript) PDActionRemoteGoTo(org.apache.pdfbox.pdmodel.interactive.action.PDActionRemoteGoTo)

Aggregations

AttributesImpl (org.xml.sax.helpers.AttributesImpl)310 SAXException (org.xml.sax.SAXException)53 Test (org.junit.Test)34 DiskWriteAttributesImpl (org.apache.geode.internal.cache.DiskWriteAttributesImpl)23 PartitionAttributesImpl (org.apache.geode.internal.cache.PartitionAttributesImpl)23 ContentHandler (org.xml.sax.ContentHandler)21 Attributes (org.xml.sax.Attributes)17 PreparedStatement (java.sql.PreparedStatement)16 ResultSet (java.sql.ResultSet)16 Map (java.util.Map)16 PackOut (org.adempiere.pipo.PackOut)16 IOException (java.io.IOException)15 POSaveFailedException (org.adempiere.pipo.exception.POSaveFailedException)12 Iterator (java.util.Iterator)11 TransformerHandler (javax.xml.transform.sax.TransformerHandler)11 StreamResult (javax.xml.transform.stream.StreamResult)11 Metadata (org.apache.tika.metadata.Metadata)11 File (java.io.File)9 SAXTransformerFactory (javax.xml.transform.sax.SAXTransformerFactory)9 DatabaseAccessException (org.adempiere.pipo.exception.DatabaseAccessException)9