use of org.apache.poi.hslf.usermodel.HSLFTextParagraph in project poi by apache.
the class TestHyperlink method testTextRunHyperlinks.
@Test
public void testTextRunHyperlinks() throws Exception {
HSLFSlideShow ppt = new HSLFSlideShow(_slTests.openResourceAsStream("WithLinks.ppt"));
HSLFSlide slide = ppt.getSlides().get(0);
List<HSLFTextParagraph> para = slide.getTextParagraphs().get(1);
String rawText = toExternalString(getRawText(para), para.get(0).getRunType());
String expected = "This page has two links:\n" + "http://jakarta.apache.org/poi/\n" + "\n" + "http://slashdot.org/\n" + "\n" + "In addition, its notes has one link";
assertEquals(expected, rawText);
List<HSLFHyperlink> links = findHyperlinks(para);
assertEquals(2, links.size());
assertEquals("http://jakarta.apache.org/poi/", links.get(0).getLabel());
assertEquals("http://jakarta.apache.org/poi/", links.get(0).getAddress());
assertEquals("http://jakarta.apache.org/poi/", rawText.substring(links.get(0).getStartIndex(), links.get(0).getEndIndex() - 1));
assertEquals("http://slashdot.org/", links.get(1).getLabel());
assertEquals("http://slashdot.org/", links.get(1).getAddress());
assertEquals("http://slashdot.org/", rawText.substring(links.get(1).getStartIndex(), links.get(1).getEndIndex() - 1));
slide = ppt.getSlides().get(1);
para = slide.getTextParagraphs().get(1);
rawText = toExternalString(getRawText(para), para.get(0).getRunType());
expected = "I have the one link:\n" + "Jakarta HSSF";
assertEquals(expected, rawText);
links.clear();
links = findHyperlinks(para);
assertNotNull(links);
assertEquals(1, links.size());
assertEquals("Open Jakarta POI HSSF module test ", links.get(0).getLabel());
assertEquals("http://jakarta.apache.org/poi/hssf/", links.get(0).getAddress());
assertEquals("Jakarta HSSF", rawText.substring(links.get(0).getStartIndex(), links.get(0).getEndIndex() - 1));
ppt.close();
}
use of org.apache.poi.hslf.usermodel.HSLFTextParagraph in project tika by apache.
the class HSLFExtractor method textRunsToText.
private void textRunsToText(XHTMLContentHandler xhtml, List<List<HSLFTextParagraph>> paragraphsList) throws SAXException {
if (paragraphsList == null) {
return;
}
for (List<HSLFTextParagraph> run : paragraphsList) {
// Leaving in wisdom from TIKA-712 for easy revert.
// Avoid boiler-plate text on the master slide (0
// = TextHeaderAtom.TITLE_TYPE, 1 = TextHeaderAtom.BODY_TYPE):
//if (!isMaster || (run.getRunType() != 0 && run.getRunType() != 1)) {
boolean isBullet = false;
for (HSLFTextParagraph htp : run) {
boolean nextBullet = htp.isBullet();
// TODO: identify bullet/list type
if (isBullet != nextBullet) {
isBullet = nextBullet;
if (isBullet) {
xhtml.startElement("ul");
} else {
xhtml.endElement("ul");
}
}
List<HSLFTextRun> textRuns = htp.getTextRuns();
String firstLine = removePBreak(textRuns.get(0).getRawText());
boolean showBullet = (isBullet && (textRuns.size() > 1 || !"".equals(firstLine)));
String paraTag = showBullet ? "li" : "p";
xhtml.startElement(paraTag);
boolean runIsHyperLink = false;
for (HSLFTextRun htr : textRuns) {
Hyperlink link = htr.getHyperlink();
if (link != null) {
String address = link.getAddress();
if (address != null && !address.startsWith("_ftn")) {
xhtml.startElement("a", "href", link.getAddress());
runIsHyperLink = true;
}
}
String line = htr.getRawText();
if (line != null) {
boolean isfirst = true;
for (String fragment : line.split("\\u000b")) {
if (!isfirst) {
xhtml.startElement("br");
xhtml.endElement("br");
}
isfirst = false;
xhtml.characters(removePBreak(fragment));
}
if (line.endsWith("")) {
xhtml.startElement("br");
xhtml.endElement("br");
}
}
if (runIsHyperLink) {
xhtml.endElement("a");
}
runIsHyperLink = false;
}
xhtml.endElement(paraTag);
}
if (isBullet) {
xhtml.endElement("ul");
}
}
}
use of org.apache.poi.hslf.usermodel.HSLFTextParagraph in project poi by apache.
the class Hyperlinks method main.
public static void main(String[] args) throws Exception {
for (int i = 0; i < args.length; i++) {
FileInputStream is = new FileInputStream(args[i]);
HSLFSlideShow ppt = new HSLFSlideShow(is);
is.close();
for (HSLFSlide slide : ppt.getSlides()) {
System.out.println("\nslide " + slide.getSlideNumber());
// read hyperlinks from the slide's text runs
System.out.println("- reading hyperlinks from the text runs");
for (List<HSLFTextParagraph> paras : slide.getTextParagraphs()) {
for (HSLFTextParagraph para : paras) {
for (HSLFTextRun run : para) {
HSLFHyperlink link = run.getHyperlink();
if (link != null) {
System.out.println(toStr(link, run.getRawText()));
}
}
}
}
// in PowerPoint you can assign a hyperlink to a shape without text,
// for example to a Line object. The code below demonstrates how to
// read such hyperlinks
System.out.println("- reading hyperlinks from the slide's shapes");
for (HSLFShape sh : slide.getShapes()) {
if (sh instanceof HSLFSimpleShape) {
HSLFHyperlink link = ((HSLFSimpleShape) sh).getHyperlink();
if (link != null) {
System.out.println(toStr(link, null));
}
}
}
}
ppt.close();
}
}
use of org.apache.poi.hslf.usermodel.HSLFTextParagraph in project poi by apache.
the class TestShapes method textBoxSet.
private void textBoxSet(String filename) throws IOException {
HSLFSlideShow ss = new HSLFSlideShow(_slTests.openResourceAsStream(filename));
for (HSLFSlide sld : ss.getSlides()) {
ArrayList<String> lst1 = new ArrayList<String>();
for (List<HSLFTextParagraph> txt : sld.getTextParagraphs()) {
for (HSLFTextParagraph p : txt) {
for (HSLFTextRun r : p) {
lst1.add(r.getRawText());
}
}
}
ArrayList<String> lst2 = new ArrayList<String>();
for (HSLFShape sh : sld.getShapes()) {
if (sh instanceof HSLFTextShape) {
HSLFTextShape tbox = (HSLFTextShape) sh;
for (HSLFTextParagraph p : tbox.getTextParagraphs()) {
for (HSLFTextRun r : p) {
lst2.add(r.getRawText());
}
}
}
}
assertTrue(lst1.containsAll(lst2));
assertTrue(lst2.containsAll(lst1));
}
ss.close();
}
use of org.apache.poi.hslf.usermodel.HSLFTextParagraph in project poi by apache.
the class TestShapes method testParagraphs.
@SuppressWarnings("unused")
@Test
public void testParagraphs() throws IOException {
HSLFSlideShow ss = new HSLFSlideShow();
HSLFSlide slide = ss.createSlide();
HSLFTextBox shape = new HSLFTextBox();
HSLFTextRun p1r1 = shape.setText("para 1 run 1. ");
HSLFTextRun p1r2 = shape.appendText("para 1 run 2.", false);
HSLFTextRun p2r1 = shape.appendText("para 2 run 1. ", true);
HSLFTextRun p2r2 = shape.appendText("para 2 run 2. ", false);
p1r1.setFontColor(Color.black);
p1r2.setFontColor(Color.red);
p2r1.setFontColor(Color.yellow);
p2r2.setStrikethrough(true);
// run 3 has same text properties as run 2 and will be merged when saving
HSLFTextRun p2r3 = shape.appendText("para 2 run 3.", false);
shape.setAnchor(new Rectangle2D.Double(100, 100, 100, 10));
slide.addShape(shape);
shape.resizeToFitText();
ByteArrayOutputStream bos = new ByteArrayOutputStream();
ss.write(bos);
ss = new HSLFSlideShow(new ByteArrayInputStream(bos.toByteArray()));
slide = ss.getSlides().get(0);
HSLFTextBox tb = (HSLFTextBox) slide.getShapes().get(0);
List<HSLFTextParagraph> para = tb.getTextParagraphs();
HSLFTextRun tr = para.get(0).getTextRuns().get(0);
assertEquals("para 1 run 1. ", tr.getRawText());
assertTrue(sameColor(Color.black, tr.getFontColor()));
tr = para.get(0).getTextRuns().get(1);
assertEquals("para 1 run 2.\r", tr.getRawText());
assertTrue(sameColor(Color.red, tr.getFontColor()));
tr = para.get(1).getTextRuns().get(0);
assertEquals("para 2 run 1. ", tr.getRawText());
assertTrue(sameColor(Color.yellow, tr.getFontColor()));
tr = para.get(1).getTextRuns().get(1);
assertEquals("para 2 run 2. para 2 run 3.", tr.getRawText());
assertTrue(sameColor(Color.black, tr.getFontColor()));
assertTrue(tr.isStrikethrough());
}
Aggregations