use of org.apache.poi.hwpf.HWPFOldDocument in project poi by apache.
the class TestHWPFOldDocument method testOldFontTableAltName.
@Test
public void testOldFontTableAltName() throws IOException {
HWPFOldDocument doc = HWPFTestDataSamples.openOldSampleFile("Bug60942b.doc");
OldFontTable oldFontTable = doc.getOldFontTable();
assertEquals(5, oldFontTable.getFontNames().length);
assertEquals("Roboto", oldFontTable.getFontNames()[3].getMainFontName());
assertEquals("arial", oldFontTable.getFontNames()[3].getAltFontName());
assertEquals("Roboto", oldFontTable.getFontNames()[4].getMainFontName());
assertEquals("arial", oldFontTable.getFontNames()[4].getAltFontName());
}
use of org.apache.poi.hwpf.HWPFOldDocument in project poi by apache.
the class TestHWPFOldDocument method test51944.
@Test
public void test51944() throws IOException {
HWPFOldDocument doc = HWPFTestDataSamples.openOldSampleFile("Bug51944.doc");
Word6Extractor ex = new Word6Extractor(doc);
StringBuilder sb = new StringBuilder();
for (String p : ex.getParagraphText()) {
sb.append(p.replaceAll("[\r\n]+", "\n"));
}
String txt = sb.toString();
assertContains(txt, "Post and Fax");
//this is at a critical juncture
assertContains(txt, "also maintain");
//this too
assertContains(txt, "which are available for");
/*
The bytes for the following test:
170 : 78 : x
171 : 0 :
172 : d : <r>
173 : 35 : 5
174 : 39 : 9
175 : 0 :
176 : 2d : -
177 : 0 :
178 : 35 : 5
179 : 0 :
180 : 35 : 5
Note that we are skipping over the value "5" at offset 173.
This is an apparently invalid sequence in MS's encoding scheme
When I open the document in MSWord, I also see "\r9-55"
*/
assertContains(txt, "\n9-55 xxxxx block5");
//TODO: figure out why these two aren't passing
//assertContains(txt, "’x block2");//make sure smart quote is extracted correctly
//assertContains(txt, "We are able to");//not sure if we can get this easily?
ex.close();
doc.close();
}
use of org.apache.poi.hwpf.HWPFOldDocument in project poi by apache.
the class TestBugs method getTextOldFile.
private String getTextOldFile(String samplefile) throws IOException {
HWPFOldDocument doc = HWPFTestDataSamples.openOldSampleFile(samplefile);
Word6Extractor extractor = new Word6Extractor(doc);
try {
return extractor.getText();
} finally {
extractor.close();
doc.close();
}
}
use of org.apache.poi.hwpf.HWPFOldDocument in project poi by apache.
the class TestHWPFOldDocument method testCodePageBug60936.
@Test
public void testCodePageBug60936() throws IOException {
//windows 1250 -- this test file was generated with OpenOffice
//see https://bz.apache.org/ooo/show_bug.cgi?id=12445 for the inspiration
HWPFOldDocument doc = HWPFTestDataSamples.openOldSampleFile("Bug60936.doc");
Word6Extractor ex = new Word6Extractor(doc);
StringBuilder sb = new StringBuilder();
for (String p : ex.getParagraphText()) {
sb.append(p);
}
//Greetings!
assertContains(sb.toString(), "4 skóre a před 7 lety");
ex.close();
doc.close();
}
use of org.apache.poi.hwpf.HWPFOldDocument in project poi by apache.
the class HWPFLister method dumpStyles.
private void dumpStyles() {
if (_doc instanceof HWPFOldDocument) {
System.out.println("Word 95 not supported so far");
return;
}
HWPFDocument hwpfDocument = (HWPFDocument) _doc;
for (int s = 0; s < hwpfDocument.getStyleSheet().numStyles(); s++) {
StyleDescription styleDescription = hwpfDocument.getStyleSheet().getStyleDescription(s);
if (styleDescription == null)
continue;
System.out.println("=== Style #" + s + " '" + styleDescription.getName() + "' ===");
System.out.println(styleDescription);
if (styleDescription.getPAPX() != null)
dumpSprms(new SprmIterator(styleDescription.getPAPX(), 2), "Style's PAP SPRM: ");
if (styleDescription.getCHPX() != null)
dumpSprms(new SprmIterator(styleDescription.getCHPX(), 0), "Style's CHP SPRM: ");
}
}
Aggregations