use of org.apache.poi.xwpf.model.XWPFHeaderFooterPolicy in project poi by apache.
the class XWPFWordExtractor method getText.
public String getText() {
StringBuffer text = new StringBuffer();
XWPFHeaderFooterPolicy hfPolicy = document.getHeaderFooterPolicy();
// Start out with all headers
extractHeaders(text, hfPolicy);
// Process all body elements
for (IBodyElement e : document.getBodyElements()) {
appendBodyElementText(text, e);
text.append('\n');
}
// Finish up with all the footers
extractFooters(text, hfPolicy);
return text.toString();
}
use of org.apache.poi.xwpf.model.XWPFHeaderFooterPolicy in project tika by apache.
the class XWPFWordExtractorDecorator method extractParagraph.
private void extractParagraph(XWPFParagraph paragraph, XWPFListManager listManager, XHTMLContentHandler xhtml) throws SAXException, XmlException, IOException {
// If this paragraph is actually a whole new section, then
// it could have its own headers and footers
// Check and handle if so
XWPFHeaderFooterPolicy headerFooterPolicy = null;
if (paragraph.getCTP().getPPr() != null) {
CTSectPr ctSectPr = paragraph.getCTP().getPPr().getSectPr();
if (ctSectPr != null) {
headerFooterPolicy = new XWPFHeaderFooterPolicy(document, ctSectPr);
extractHeaders(xhtml, headerFooterPolicy, listManager);
}
}
// Is this a paragraph, or a heading?
String tag = "p";
String styleClass = null;
//TIKA-2144 check that styles is not null
if (paragraph.getStyleID() != null && styles != null) {
XWPFStyle style = styles.getStyle(paragraph.getStyleID());
if (style != null && style.getName() != null) {
TagAndStyle tas = WordExtractor.buildParagraphTagAndStyle(style.getName(), paragraph.getPartType() == BodyType.TABLECELL);
tag = tas.getTag();
styleClass = tas.getStyleClass();
}
}
if (styleClass == null) {
xhtml.startElement(tag);
} else {
xhtml.startElement(tag, "class", styleClass);
}
writeParagraphNumber(paragraph, listManager, xhtml);
// TODO: replace w/ XPath/XQuery:
for (XWPFRun run : paragraph.getRuns()) {
XmlCursor c = run.getCTR().newCursor();
c.selectPath("./*");
while (c.toNextSelection()) {
XmlObject o = c.getObject();
if (o instanceof CTObject) {
XmlCursor c2 = o.newCursor();
c2.selectPath("./*");
while (c2.toNextSelection()) {
XmlObject o2 = c2.getObject();
XmlObject embedAtt = o2.selectAttribute(new QName("Type"));
if (embedAtt != null && embedAtt.getDomNode().getNodeValue().equals("Embed")) {
// Type is "Embed"
XmlObject relIDAtt = o2.selectAttribute(new QName("http://schemas.openxmlformats.org/officeDocument/2006/relationships", "id"));
if (relIDAtt != null) {
String relID = relIDAtt.getDomNode().getNodeValue();
AttributesImpl attributes = new AttributesImpl();
attributes.addAttribute("", "class", "class", "CDATA", "embedded");
attributes.addAttribute("", "id", "id", "CDATA", relID);
xhtml.startElement("div", attributes);
xhtml.endElement("div");
}
}
}
c2.dispose();
}
}
c.dispose();
}
// we just put them in the correct paragraph)
for (int i = 0; i < paragraph.getCTP().sizeOfBookmarkStartArray(); i++) {
CTBookmark bookmark = paragraph.getCTP().getBookmarkStartArray(i);
xhtml.startElement("a", "name", bookmark.getName());
xhtml.endElement("a");
}
TmpFormatting fmtg = new TmpFormatting(false, false);
//hyperlinks may or may not have hyperlink ids
String lastHyperlinkId = null;
boolean inHyperlink = false;
// Do the iruns
for (IRunElement run : paragraph.getIRuns()) {
if (run instanceof XWPFHyperlinkRun) {
XWPFHyperlinkRun hyperlinkRun = (XWPFHyperlinkRun) run;
if (hyperlinkRun.getHyperlinkId() == null || !hyperlinkRun.getHyperlinkId().equals(lastHyperlinkId)) {
if (inHyperlink) {
//close out the old one
xhtml.endElement("a");
inHyperlink = false;
}
lastHyperlinkId = hyperlinkRun.getHyperlinkId();
fmtg = closeStyleTags(xhtml, fmtg);
XWPFHyperlink link = hyperlinkRun.getHyperlink(document);
if (link != null && link.getURL() != null) {
xhtml.startElement("a", "href", link.getURL());
inHyperlink = true;
} else if (hyperlinkRun.getAnchor() != null && hyperlinkRun.getAnchor().length() > 0) {
xhtml.startElement("a", "href", "#" + hyperlinkRun.getAnchor());
inHyperlink = true;
}
}
} else if (inHyperlink) {
//if this isn't a hyperlink, but the last one was
closeStyleTags(xhtml, fmtg);
xhtml.endElement("a");
lastHyperlinkId = null;
inHyperlink = false;
}
if (run instanceof XWPFSDT) {
fmtg = closeStyleTags(xhtml, fmtg);
processSDTRun((XWPFSDT) run, xhtml);
//for now, we're ignoring formatting in sdt
//if you hit an sdt reset to false
fmtg.setBold(false);
fmtg.setItalic(false);
} else {
fmtg = processRun((XWPFRun) run, paragraph, xhtml, fmtg);
}
}
closeStyleTags(xhtml, fmtg);
if (inHyperlink) {
xhtml.endElement("a");
}
// Now do any comments for the paragraph
XWPFCommentsDecorator comments = new XWPFCommentsDecorator(paragraph, null);
String commentText = comments.getCommentText();
if (commentText != null && commentText.length() > 0) {
xhtml.characters(commentText);
}
String footnameText = paragraph.getFootnoteText();
if (footnameText != null && footnameText.length() > 0) {
xhtml.characters(footnameText + "\n");
}
// Also extract any paragraphs embedded in text boxes:
if (config.getIncludeShapeBasedContent()) {
for (XmlObject embeddedParagraph : paragraph.getCTP().selectPath("declare namespace w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' declare namespace wps='http://schemas.microsoft.com/office/word/2010/wordprocessingShape' .//*/wps:txbx/w:txbxContent/w:p")) {
extractParagraph(new XWPFParagraph(CTP.Factory.parse(embeddedParagraph.xmlText()), paragraph.getBody()), listManager, xhtml);
}
}
// Finish this paragraph
xhtml.endElement(tag);
if (headerFooterPolicy != null) {
extractFooters(xhtml, headerFooterPolicy, listManager);
}
}
use of org.apache.poi.xwpf.model.XWPFHeaderFooterPolicy in project poi by apache.
the class TestXWPFHeader method testImageInHeader.
@Test
public void testImageInHeader() throws IOException {
XWPFDocument sampleDoc = XWPFTestDataSamples.openSampleDocument("headerPic.docx");
XWPFHeaderFooterPolicy policy = sampleDoc.getHeaderFooterPolicy();
XWPFHeader header = policy.getDefaultHeader();
assertNotNull(header.getRelations());
assertEquals(1, header.getRelations().size());
}
use of org.apache.poi.xwpf.model.XWPFHeaderFooterPolicy in project poi by apache.
the class TestXWPFHeader method testSetHeader.
@Test
public void testSetHeader() throws IOException {
XWPFDocument sampleDoc = XWPFTestDataSamples.openSampleDocument("SampleDoc.docx");
// no header is set (yet)
XWPFHeaderFooterPolicy policy = sampleDoc.getHeaderFooterPolicy();
assertNull(policy.getDefaultHeader());
assertNull(policy.getFirstPageHeader());
assertNull(policy.getDefaultFooter());
assertNull(policy.getFirstPageFooter());
CTP ctP1 = CTP.Factory.newInstance();
CTR ctR1 = ctP1.addNewR();
CTText t = ctR1.addNewT();
String tText = "Paragraph in header";
t.setStringValue(tText);
// Commented MB 23 May 2010
//CTP ctP2 = CTP.Factory.newInstance();
//CTR ctR2 = ctP2.addNewR();
//CTText t2 = ctR2.addNewT();
//t2.setStringValue("Second paragraph.. for footer");
// Create two paragraphs for insertion into the footer.
// Previously only one was inserted MB 23 May 2010
CTP ctP2 = CTP.Factory.newInstance();
CTR ctR2 = ctP2.addNewR();
CTText t2 = ctR2.addNewT();
t2.setStringValue("First paragraph for the footer");
CTP ctP3 = CTP.Factory.newInstance();
CTR ctR3 = ctP3.addNewR();
CTText t3 = ctR3.addNewT();
t3.setStringValue("Second paragraph for the footer");
XWPFParagraph p1 = new XWPFParagraph(ctP1, sampleDoc);
XWPFParagraph[] pars = new XWPFParagraph[1];
pars[0] = p1;
XWPFParagraph p2 = new XWPFParagraph(ctP2, sampleDoc);
XWPFParagraph p3 = new XWPFParagraph(ctP3, sampleDoc);
XWPFParagraph[] pars2 = new XWPFParagraph[2];
pars2[0] = p2;
pars2[1] = p3;
// Set headers
XWPFHeader headerD = policy.createHeader(XWPFHeaderFooterPolicy.DEFAULT, pars);
XWPFHeader headerF = policy.createHeader(XWPFHeaderFooterPolicy.FIRST);
// Set a default footer and capture the returned XWPFFooter object.
XWPFFooter footerD = policy.createFooter(XWPFHeaderFooterPolicy.DEFAULT, pars2);
XWPFFooter footerF = policy.createFooter(XWPFHeaderFooterPolicy.FIRST);
// Ensure the headers and footer were set correctly....
assertNotNull(policy.getDefaultHeader());
assertNotNull(policy.getFirstPageHeader());
assertNotNull(policy.getDefaultFooter());
assertNotNull(policy.getFirstPageFooter());
// ....and that the footer object captured above contains two
// paragraphs of text.
assertEquals(2, footerD.getParagraphs().size());
assertEquals(0, footerF.getParagraphs().size());
// Check the header created with the paragraph got them, and the one
// created without got none
assertEquals(1, headerD.getParagraphs().size());
assertEquals(tText, headerD.getParagraphs().get(0).getText());
assertEquals(0, headerF.getParagraphs().size());
// As an additional check, recover the defauls footer and
// make sure that it contains two paragraphs of text and that
// both do hold what is expected.
footerD = policy.getDefaultFooter();
XWPFParagraph[] paras = footerD.getParagraphs().toArray(new XWPFParagraph[0]);
assertEquals(2, paras.length);
assertEquals("First paragraph for the footer", paras[0].getText());
assertEquals("Second paragraph for the footer", paras[1].getText());
// Add some text to the empty header
String fText1 = "New Text!";
String fText2 = "More Text!";
headerF.createParagraph().insertNewRun(0).setText(fText1);
headerF.createParagraph().insertNewRun(0).setText(fText2);
// headerF.getParagraphs().get(0).insertNewRun(0).setText(fText1);
// Check it
assertEquals(tText, headerD.getParagraphs().get(0).getText());
assertEquals(fText1, headerF.getParagraphs().get(0).getText());
assertEquals(fText2, headerF.getParagraphs().get(1).getText());
// Save, re-open, ensure it's all still there
XWPFDocument reopened = XWPFTestDataSamples.writeOutAndReadBack(sampleDoc);
policy = reopened.getHeaderFooterPolicy();
assertNotNull(policy.getDefaultHeader());
assertNotNull(policy.getFirstPageHeader());
assertNull(policy.getEvenPageHeader());
assertNotNull(policy.getDefaultFooter());
assertNotNull(policy.getFirstPageFooter());
assertNull(policy.getEvenPageFooter());
// Check the new headers still have their text
headerD = policy.getDefaultHeader();
headerF = policy.getFirstPageHeader();
assertEquals(tText, headerD.getParagraphs().get(0).getText());
assertEquals(fText1, headerF.getParagraphs().get(0).getText());
assertEquals(fText2, headerF.getParagraphs().get(1).getText());
// Check the new footers have their new text too
footerD = policy.getDefaultFooter();
paras = footerD.getParagraphs().toArray(new XWPFParagraph[0]);
footerF = policy.getFirstPageFooter();
assertEquals(2, paras.length);
assertEquals("First paragraph for the footer", paras[0].getText());
assertEquals("Second paragraph for the footer", paras[1].getText());
assertEquals(1, footerF.getParagraphs().size());
}
use of org.apache.poi.xwpf.model.XWPFHeaderFooterPolicy in project poi by apache.
the class TestXWPFHeader method testSimpleHeader.
@Test
public void testSimpleHeader() throws IOException {
XWPFDocument sampleDoc = XWPFTestDataSamples.openSampleDocument("headerFooter.docx");
XWPFHeaderFooterPolicy policy = sampleDoc.getHeaderFooterPolicy();
XWPFHeader header = policy.getDefaultHeader();
XWPFFooter footer = policy.getDefaultFooter();
assertNotNull(header);
assertNotNull(footer);
}
Aggregations