use of org.xml.sax.helpers.AttributesImpl in project tika by apache.
the class OutlookPSTParser method createAttribute.
private static AttributesImpl createAttribute(String attName, String attValue) {
AttributesImpl attributes = new AttributesImpl();
attributes.addAttribute("", attName, attName, "CDATA", attValue);
return attributes;
}
use of org.xml.sax.helpers.AttributesImpl in project webservices-axiom by apache.
the class TestGetSAXResultWithDTD method runTest.
@Override
protected void runTest() throws Throwable {
OMElement root = metaFactory.getOMFactory().createOMElement("root", null);
SAXResult result = root.getSAXResult();
LexicalHandler lexicalHandler = result.getLexicalHandler();
ContentHandler contentHandler = result.getHandler();
contentHandler.startDocument();
lexicalHandler.startDTD("test", null, "my.dtd");
lexicalHandler.endDTD();
contentHandler.startElement("", "test", "test", new AttributesImpl());
contentHandler.endElement("", "test", "test");
contentHandler.endDocument();
OMNode child = root.getFirstOMChild();
assertTrue(child instanceof OMElement);
assertEquals("test", ((OMElement) child).getLocalName());
}
use of org.xml.sax.helpers.AttributesImpl in project tika by apache.
the class AbstractPDF2XHTML method endPage.
@Override
protected void endPage(PDPage page) throws IOException {
try {
for (PDAnnotation annotation : page.getAnnotations()) {
if (annotation instanceof PDAnnotationFileAttachment) {
PDAnnotationFileAttachment fann = (PDAnnotationFileAttachment) annotation;
PDComplexFileSpecification fileSpec = (PDComplexFileSpecification) fann.getFile();
try {
AttributesImpl attributes = new AttributesImpl();
attributes.addAttribute("", "source", "source", "CDATA", "annotation");
extractMultiOSPDEmbeddedFiles(fann.getAttachmentName(), fileSpec, attributes);
} catch (SAXException e) {
throw new IOExceptionWithCause("file embedded in annotation sax exception", e);
} catch (TikaException e) {
throw new IOExceptionWithCause("file embedded in annotation tika exception", e);
} catch (IOException e) {
handleCatchableIOE(e);
}
} else if (annotation instanceof PDAnnotationWidget) {
handleWidget((PDAnnotationWidget) annotation);
}
// TODO: remove once PDFBOX-1143 is fixed:
if (config.getExtractAnnotationText()) {
PDActionURI uri = getActionURI(annotation);
if (uri != null) {
String link = uri.getURI();
if (link != null && link.trim().length() > 0) {
xhtml.startElement("div", "class", "annotation");
xhtml.startElement("a", "href", link);
xhtml.characters(link);
xhtml.endElement("a");
xhtml.endElement("div");
}
}
if (annotation instanceof PDAnnotationMarkup) {
PDAnnotationMarkup annotationMarkup = (PDAnnotationMarkup) annotation;
String title = annotationMarkup.getTitlePopup();
String subject = annotationMarkup.getSubject();
String contents = annotationMarkup.getContents();
// TODO: maybe also annotationMarkup.getRichContents()?
if (title != null || subject != null || contents != null) {
xhtml.startElement("div", "class", "annotation");
if (title != null) {
xhtml.startElement("div", "class", "annotationTitle");
xhtml.characters(title);
xhtml.endElement("div");
}
if (subject != null) {
xhtml.startElement("div", "class", "annotationSubject");
xhtml.characters(subject);
xhtml.endElement("div");
}
if (contents != null) {
xhtml.startElement("div", "class", "annotationContents");
xhtml.characters(contents);
xhtml.endElement("div");
}
xhtml.endElement("div");
}
}
}
}
if (config.getOcrStrategy().equals(PDFParserConfig.OCR_STRATEGY.OCR_AND_TEXT_EXTRACTION)) {
doOCROnCurrentPage();
}
PDPageAdditionalActions pageActions = page.getActions();
if (pageActions != null) {
handleDestinationOrAction(pageActions.getC(), ActionTrigger.PAGE_CLOSE);
handleDestinationOrAction(pageActions.getO(), ActionTrigger.PAGE_OPEN);
}
xhtml.endElement("div");
} catch (SAXException | TikaException e) {
throw new IOExceptionWithCause("Unable to end a page", e);
} catch (IOException e) {
exceptions.add(e);
} finally {
pageIndex++;
}
}
use of org.xml.sax.helpers.AttributesImpl in project tika by apache.
the class AbstractPDF2XHTML method handleSignature.
private void handleSignature(AttributesImpl parentAttributes, PDSignatureField sigField) throws SAXException {
PDSignature sig = sigField.getSignature();
if (sig == null) {
return;
}
Map<String, String> vals = new TreeMap<>();
vals.put("name", sig.getName());
vals.put("contactInfo", sig.getContactInfo());
vals.put("location", sig.getLocation());
vals.put("reason", sig.getReason());
Calendar cal = sig.getSignDate();
if (cal != null) {
dateFormat.setTimeZone(cal.getTimeZone());
vals.put("date", dateFormat.format(cal.getTime()));
}
//see if there is any data
int nonNull = 0;
for (String val : vals.keySet()) {
if (val != null && !val.equals("")) {
nonNull++;
}
}
//if there is, process it
if (nonNull > 0) {
xhtml.startElement("li", parentAttributes);
AttributesImpl attrs = new AttributesImpl();
attrs.addAttribute("", "type", "type", "CDATA", "signaturedata");
xhtml.startElement("ol", attrs);
for (Map.Entry<String, String> e : vals.entrySet()) {
if (e.getValue() == null || e.getValue().equals("")) {
continue;
}
attrs = new AttributesImpl();
attrs.addAttribute("", "signdata", "signdata", "CDATA", e.getKey());
xhtml.startElement("li", attrs);
xhtml.characters(e.getValue());
xhtml.endElement("li");
}
xhtml.endElement("ol");
xhtml.endElement("li");
}
}
use of org.xml.sax.helpers.AttributesImpl in project tika by apache.
the class AbstractPDF2XHTML method handleDestinationOrAction.
private void handleDestinationOrAction(PDDestinationOrAction action, ActionTrigger actionTrigger) throws IOException, SAXException, TikaException {
if (action == null || !config.getExtractActions()) {
return;
}
AttributesImpl attributes = new AttributesImpl();
String actionOrDestString = (action instanceof PDAction) ? "action" : "destination";
addNonNullAttribute("class", actionOrDestString, attributes);
addNonNullAttribute("type", action.getClass().getSimpleName(), attributes);
addNonNullAttribute("trigger", actionTrigger.name(), attributes);
if (action instanceof PDActionImportData) {
processDoc("", ((PDActionImportData) action).getFile(), attributes);
} else if (action instanceof PDActionLaunch) {
PDActionLaunch pdActionLaunch = (PDActionLaunch) action;
addNonNullAttribute("id", pdActionLaunch.getF(), attributes);
addNonNullAttribute("defaultDirectory", pdActionLaunch.getD(), attributes);
addNonNullAttribute("operation", pdActionLaunch.getO(), attributes);
addNonNullAttribute("parameters", pdActionLaunch.getP(), attributes);
processDoc(pdActionLaunch.getF(), pdActionLaunch.getFile(), attributes);
} else if (action instanceof PDActionRemoteGoTo) {
PDActionRemoteGoTo remoteGoTo = (PDActionRemoteGoTo) action;
processDoc("", remoteGoTo.getFile(), attributes);
} else if (action instanceof PDActionJavaScript) {
PDActionJavaScript jsAction = (PDActionJavaScript) action;
Metadata m = new Metadata();
m.set(Metadata.CONTENT_TYPE, "application/javascript");
m.set(Metadata.CONTENT_ENCODING, StandardCharsets.UTF_8.toString());
m.set(PDF.ACTION_TRIGGER, actionTrigger.toString());
m.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE, TikaCoreProperties.EmbeddedResourceType.MACRO.name());
String js = jsAction.getAction();
js = (js == null) ? "" : js;
if (embeddedDocumentExtractor.shouldParseEmbedded(m)) {
try (InputStream is = TikaInputStream.get(js.getBytes(StandardCharsets.UTF_8))) {
embeddedDocumentExtractor.parseEmbedded(is, xhtml, m, false);
}
}
addNonNullAttribute("class", "javascript", attributes);
addNonNullAttribute("type", jsAction.getType(), attributes);
addNonNullAttribute("subtype", jsAction.getSubType(), attributes);
xhtml.startElement("div", attributes);
xhtml.endElement("div");
} else {
xhtml.startElement("div", attributes);
xhtml.endElement("div");
}
}
Aggregations