use of org.apache.poi.hwpf.usermodel.Range in project poi by apache.
the class HWPFLister method dumpParagraphsDom.
public void dumpParagraphsDom(boolean withText) {
Range range = _doc.getOverallRange();
for (int p = 0; p < range.numParagraphs(); p++) {
Paragraph paragraph = range.getParagraph(p);
System.out.println(p + ":\t" + paragraph);
if (withText)
System.out.println(paragraph.text());
}
}
use of org.apache.poi.hwpf.usermodel.Range in project poi by apache.
the class Word6Extractor method getParagraphText.
/**
* Get the text from the word file, as an array with one String
* per paragraph
*/
@Deprecated
public String[] getParagraphText() {
String[] ret;
// Extract using the model code
try {
Range r = doc.getRange();
ret = WordExtractor.getParagraphText(r);
} catch (Exception e) {
// Something's up with turning the text pieces into paragraphs
// Fall back to ripping out the text pieces
ret = new String[doc.getTextTable().getTextPieces().size()];
for (int i = 0; i < ret.length; i++) {
ret[i] = doc.getTextTable().getTextPieces().get(i).getStringBuilder().toString();
// Fix the line endings
ret[i] = ret[i].replaceAll("\r", "");
ret[i] = ret[i].replaceAll("", "\r\n");
}
}
return ret;
}
use of org.apache.poi.hwpf.usermodel.Range in project poi by apache.
the class WordExtractor method getParagraphText.
/**
* Get the text from the word file, as an array with one String per
* paragraph
*/
public String[] getParagraphText() {
String[] ret;
// Extract using the model code
try {
Range r = doc.getRange();
ret = getParagraphText(r);
} catch (Exception e) {
// Something's up with turning the text pieces into paragraphs
// Fall back to ripping out the text pieces
ret = new String[1];
ret[0] = getTextFromPieces();
}
return ret;
}
use of org.apache.poi.hwpf.usermodel.Range in project poi by apache.
the class HWPFLister method dumpChpx.
public void dumpChpx(boolean withProperties, boolean withSprms) {
for (CHPX chpx : _doc.getCharacterTable().getTextRuns()) {
System.out.println(chpx);
if (withProperties) {
System.out.println(chpx.getCharacterProperties(_doc.getStyleSheet(), (short) StyleSheet.NIL_STYLE));
}
if (withSprms) {
SprmIterator sprmIt = new SprmIterator(chpx.getGrpprl(), 0);
while (sprmIt.hasNext()) {
SprmOperation sprm = sprmIt.next();
System.out.println("\t" + sprm);
}
}
String text = new Range(chpx.getStart(), chpx.getEnd(), _doc.getOverallRange()) {
public String toString() {
return "CHPX range (" + super.toString() + ")";
}
}.text();
StringBuilder stringBuilder = new StringBuilder();
for (char c : text.toCharArray()) {
if (c < 30)
stringBuilder.append("\\0x").append(Integer.toHexString(c));
else
stringBuilder.append(c);
}
System.out.println(stringBuilder);
}
}
use of org.apache.poi.hwpf.usermodel.Range in project poi by apache.
the class PicturesTable method getAllPictures.
/**
* Not all documents have all the images concatenated in the data stream
* although MS claims so. The best approach is to scan all character runs.
*
* @return a list of Picture objects found in current document
*/
public List<Picture> getAllPictures() {
ArrayList<Picture> pictures = new ArrayList<Picture>();
Range range = _document.getOverallRange();
for (int i = 0; i < range.numCharacterRuns(); i++) {
CharacterRun run = range.getCharacterRun(i);
if (run == null) {
continue;
}
Picture picture = extractPicture(run, false);
if (picture != null) {
pictures.add(picture);
}
}
searchForPictures(_dgg.getEscherRecords(), pictures);
return pictures;
}
Aggregations