use of org.apache.poi.hwpf.HWPFOldDocument in project poi by apache.
the class TestHWPFOldDocument method testWord6hwpfOld.
@Test
public void testWord6hwpfOld() throws IOException {
// Open
HWPFOldDocument doc = HWPFTestDataSamples.openOldSampleFile("Word6.doc");
// Check
assertEquals(1, doc.getRange().numSections());
assertEquals(1, doc.getRange().numParagraphs());
assertEquals(1, doc.getRange().numCharacterRuns());
assertEquals("The quick brown fox jumps over the lazy dog\r", doc.getRange().getParagraph(0).text());
doc.close();
}
use of org.apache.poi.hwpf.HWPFOldDocument in project poi by apache.
the class TestHWPFOldDocument method testDefaultCodePageEncoding.
@Test
public void testDefaultCodePageEncoding() throws IOException {
HWPFOldDocument doc = HWPFTestDataSamples.openOldSampleFile("Bug60942.doc");
Word6Extractor ex = new Word6Extractor(doc);
String txt = ex.getText();
assertContains(txt, "BERTHOD");
assertContains(txt, "APPLICOLOR");
assertContains(txt, "les meilleurs");
assertContains(txt, "GUY LECOLE");
ex.close();
doc.close();
}
use of org.apache.poi.hwpf.HWPFOldDocument in project poi by apache.
the class TestBugs method testBug51944.
/**
* Bug 51944 - PAPFormattedDiskPage.getPAPX - IndexOutOfBounds
*/
@Test
public void testBug51944() throws Exception {
HWPFOldDocument doc = HWPFTestDataSamples.openOldSampleFile("Bug51944.doc");
assertNotNull(WordToTextConverter.getText(doc));
}
use of org.apache.poi.hwpf.HWPFOldDocument in project tika by apache.
the class WordExtractor method parseWord6.
protected void parseWord6(DirectoryNode root, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException {
HWPFOldDocument doc = new HWPFOldDocument(root);
Word6Extractor extractor = new Word6Extractor(doc);
for (String p : extractor.getParagraphText()) {
xhtml.element("p", p);
}
}
use of org.apache.poi.hwpf.HWPFOldDocument in project poi by apache.
the class TestHWPFOldDocument method testWord6Sections.
/**
* Test a word document that has sections, as well as the usual paragraph
* stuff.
*/
@Test
public void testWord6Sections() throws IOException {
HWPFOldDocument doc = HWPFTestDataSamples.openOldSampleFile("Word6_sections.doc");
assertEquals(3, doc.getRange().numSections());
assertEquals(6, doc.getRange().numParagraphs());
assertEquals("This is a test.\r", doc.getRange().getParagraph(0).text());
assertEquals("\r", doc.getRange().getParagraph(1).text());
// Section / line?
assertEquals("", doc.getRange().getParagraph(2).text());
assertEquals("This is a new section.\r", doc.getRange().getParagraph(3).text());
// Section / line?
assertEquals("", doc.getRange().getParagraph(4).text());
assertEquals("\r", doc.getRange().getParagraph(5).text());
doc.close();
}
Aggregations