use of org.apache.poi.hwpf.sprm.SprmIterator in project poi by apache.
the class HWPFLister method dumpPapx.
public void dumpPapx(boolean withProperties, boolean withSprms) throws Exception {
if (_doc instanceof HWPFDocument) {
System.out.println("binary PAP pages ");
HWPFDocument doc = (HWPFDocument) _doc;
byte[] mainStream = _doc.getMainStream();
PlexOfCps binTable = new PlexOfCps(doc.getTableStream(), doc.getFileInformationBlock().getFcPlcfbtePapx(), doc.getFileInformationBlock().getLcbPlcfbtePapx(), 4);
List<PAPX> papxs = new ArrayList<PAPX>();
int length = binTable.length();
for (int x = 0; x < length; x++) {
GenericPropertyNode node = binTable.getProperty(x);
int pageNum = LittleEndian.getInt(node.getBytes());
int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum;
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(mainStream, doc.getDataStream(), pageOffset, doc.getTextTable());
System.out.println("* PFKP: " + pfkp);
for (PAPX papx : pfkp.getPAPXs()) {
System.out.println("** " + papx);
papxs.add(papx);
if (papx != null && withSprms) {
SprmIterator sprmIt = new SprmIterator(papx.getGrpprl(), 2);
dumpSprms(sprmIt, "*** ");
}
}
}
Collections.sort(papxs);
System.out.println("* Sorted by END");
for (PAPX papx : papxs) {
System.out.println("** " + papx);
if (papx != null && withSprms) {
SprmIterator sprmIt = new SprmIterator(papx.getGrpprl(), 2);
dumpSprms(sprmIt, "*** ");
}
}
}
for (PAPX papx : _doc.getParagraphTable().getParagraphs()) {
System.out.println(papx);
if (withProperties) {
Paragraph paragraph = Paragraph.newParagraph(_doc.getOverallRange(), papx);
System.out.println(paragraph.getProps());
}
SprmIterator sprmIt = new SprmIterator(papx.getGrpprl(), 2);
dumpSprms(sprmIt, "\t");
}
}
use of org.apache.poi.hwpf.sprm.SprmIterator in project poi by apache.
the class HWPFLister method dumpParagraphLevels.
protected void dumpParagraphLevels(ListTables listTables, ParagraphProperties paragraph) {
if (paragraph.getIlfo() != 0) {
final LFO lfo = listTables.getLfo(paragraph.getIlfo());
System.out.println("PAP's LFO: " + lfo);
final LFOData lfoData = listTables.getLfoData(paragraph.getIlfo());
System.out.println("PAP's LFOData: " + lfoData);
if (lfo != null) {
final ListLevel listLevel = listTables.getLevel(lfo.getLsid(), paragraph.getIlvl());
System.out.println("PAP's ListLevel: " + listLevel);
if (listLevel.getGrpprlPapx() != null) {
System.out.println("PAP's ListLevel PAPX:");
dumpSprms(new SprmIterator(listLevel.getGrpprlPapx(), 0), "* ");
}
if (listLevel.getGrpprlPapx() != null) {
System.out.println("PAP's ListLevel CHPX:");
dumpSprms(new SprmIterator(listLevel.getGrpprlChpx(), 0), "* ");
}
}
}
}
use of org.apache.poi.hwpf.sprm.SprmIterator in project poi by apache.
the class CHPBinTable method rebuild.
public void rebuild(ComplexFileTable complexFileTable) {
long start = System.currentTimeMillis();
if (complexFileTable != null) {
SprmBuffer[] sprmBuffers = complexFileTable.getGrpprls();
// adding CHPX from fast-saved SPRMs
for (TextPiece textPiece : complexFileTable.getTextPieceTable().getTextPieces()) {
PropertyModifier prm = textPiece.getPieceDescriptor().getPrm();
if (!prm.isComplex())
continue;
int igrpprl = prm.getIgrpprl();
if (igrpprl < 0 || igrpprl >= sprmBuffers.length) {
logger.log(POILogger.WARN, textPiece + "'s PRM references to unknown grpprl");
continue;
}
boolean hasChp = false;
SprmBuffer sprmBuffer = sprmBuffers[igrpprl];
for (SprmIterator iterator = sprmBuffer.iterator(); iterator.hasNext(); ) {
SprmOperation sprmOperation = iterator.next();
if (sprmOperation.getType() == SprmOperation.TYPE_CHP) {
hasChp = true;
break;
}
}
if (hasChp) {
SprmBuffer newSprmBuffer = sprmBuffer.clone();
CHPX chpx = new CHPX(textPiece.getStart(), textPiece.getEnd(), newSprmBuffer);
_textRuns.add(chpx);
}
}
logger.log(POILogger.DEBUG, "Merged with CHPX from complex file table in ", Long.valueOf(System.currentTimeMillis() - start), " ms (", Integer.valueOf(_textRuns.size()), " elements in total)");
start = System.currentTimeMillis();
}
List<CHPX> oldChpxSortedByStartPos = new ArrayList<CHPX>(_textRuns);
Collections.sort(oldChpxSortedByStartPos, PropertyNode.StartComparator.instance);
logger.log(POILogger.DEBUG, "CHPX sorted by start position in ", Long.valueOf(System.currentTimeMillis() - start), " ms");
start = System.currentTimeMillis();
final Map<CHPX, Integer> chpxToFileOrder = new IdentityHashMap<CHPX, Integer>();
{
int counter = 0;
for (CHPX chpx : _textRuns) {
chpxToFileOrder.put(chpx, Integer.valueOf(counter++));
}
}
final Comparator<CHPX> chpxFileOrderComparator = new Comparator<CHPX>() {
public int compare(CHPX o1, CHPX o2) {
Integer i1 = chpxToFileOrder.get(o1);
Integer i2 = chpxToFileOrder.get(o2);
return i1.compareTo(i2);
}
};
logger.log(POILogger.DEBUG, "CHPX's order map created in ", Long.valueOf(System.currentTimeMillis() - start), " ms");
start = System.currentTimeMillis();
List<Integer> textRunsBoundariesList;
{
Set<Integer> textRunsBoundariesSet = new HashSet<Integer>();
for (CHPX chpx : _textRuns) {
textRunsBoundariesSet.add(Integer.valueOf(chpx.getStart()));
textRunsBoundariesSet.add(Integer.valueOf(chpx.getEnd()));
}
textRunsBoundariesSet.remove(Integer.valueOf(0));
textRunsBoundariesList = new ArrayList<Integer>(textRunsBoundariesSet);
Collections.sort(textRunsBoundariesList);
}
logger.log(POILogger.DEBUG, "Texts CHPX boundaries collected in ", Long.valueOf(System.currentTimeMillis() - start), " ms");
start = System.currentTimeMillis();
List<CHPX> newChpxs = new LinkedList<CHPX>();
int lastTextRunStart = 0;
for (Integer objBoundary : textRunsBoundariesList) {
final int boundary = objBoundary.intValue();
final int startInclusive = lastTextRunStart;
final int endExclusive = boundary;
lastTextRunStart = endExclusive;
int startPosition = binarySearch(oldChpxSortedByStartPos, boundary);
startPosition = Math.abs(startPosition);
while (startPosition >= oldChpxSortedByStartPos.size()) startPosition--;
while (startPosition > 0 && oldChpxSortedByStartPos.get(startPosition).getStart() >= boundary) startPosition--;
List<CHPX> chpxs = new LinkedList<CHPX>();
for (int c = startPosition; c < oldChpxSortedByStartPos.size(); c++) {
CHPX chpx = oldChpxSortedByStartPos.get(c);
if (boundary < chpx.getStart())
break;
int left = Math.max(startInclusive, chpx.getStart());
int right = Math.min(endExclusive, chpx.getEnd());
if (left < right) {
chpxs.add(chpx);
}
}
if (chpxs.size() == 0) {
logger.log(POILogger.WARN, "Text piece [", Integer.valueOf(startInclusive), "; ", Integer.valueOf(endExclusive), ") has no CHPX. Creating new one.");
// create it manually
CHPX chpx = new CHPX(startInclusive, endExclusive, new SprmBuffer(0));
newChpxs.add(chpx);
continue;
}
if (chpxs.size() == 1) {
// can we reuse existing?
CHPX existing = chpxs.get(0);
if (existing.getStart() == startInclusive && existing.getEnd() == endExclusive) {
newChpxs.add(existing);
continue;
}
}
Collections.sort(chpxs, chpxFileOrderComparator);
SprmBuffer sprmBuffer = new SprmBuffer(0);
for (CHPX chpx : chpxs) {
sprmBuffer.append(chpx.getGrpprl(), 0);
}
CHPX newChpx = new CHPX(startInclusive, endExclusive, sprmBuffer);
newChpxs.add(newChpx);
continue;
}
this._textRuns = new ArrayList<CHPX>(newChpxs);
logger.log(POILogger.DEBUG, "CHPX rebuilded in ", Long.valueOf(System.currentTimeMillis() - start), " ms (", Integer.valueOf(_textRuns.size()), " elements)");
start = System.currentTimeMillis();
CHPX previous = null;
for (Iterator<CHPX> iterator = _textRuns.iterator(); iterator.hasNext(); ) {
CHPX current = iterator.next();
if (previous == null) {
previous = current;
continue;
}
if (previous.getEnd() == current.getStart() && Arrays.equals(previous.getGrpprl(), current.getGrpprl())) {
previous.setEnd(current.getEnd());
iterator.remove();
continue;
}
previous = current;
}
logger.log(POILogger.DEBUG, "CHPX compacted in ", Long.valueOf(System.currentTimeMillis() - start), " ms (", Integer.valueOf(_textRuns.size()), " elements)");
}
use of org.apache.poi.hwpf.sprm.SprmIterator in project poi by apache.
the class PAPBinTable method rebuild.
static void rebuild(final StringBuilder docText, ComplexFileTable complexFileTable, List<PAPX> paragraphs) {
long start = System.currentTimeMillis();
if (complexFileTable != null) {
SprmBuffer[] sprmBuffers = complexFileTable.getGrpprls();
// adding PAPX from fast-saved SPRMs
for (TextPiece textPiece : complexFileTable.getTextPieceTable().getTextPieces()) {
PropertyModifier prm = textPiece.getPieceDescriptor().getPrm();
if (!prm.isComplex())
continue;
int igrpprl = prm.getIgrpprl();
if (igrpprl < 0 || igrpprl >= sprmBuffers.length) {
logger.log(POILogger.WARN, textPiece + "'s PRM references to unknown grpprl");
continue;
}
boolean hasPap = false;
SprmBuffer sprmBuffer = sprmBuffers[igrpprl];
for (SprmIterator iterator = sprmBuffer.iterator(); iterator.hasNext(); ) {
SprmOperation sprmOperation = iterator.next();
if (sprmOperation.getType() == SprmOperation.TYPE_PAP) {
hasPap = true;
break;
}
}
if (hasPap) {
SprmBuffer newSprmBuffer = new SprmBuffer(2);
newSprmBuffer.append(sprmBuffer.toByteArray());
PAPX papx = new PAPX(textPiece.getStart(), textPiece.getEnd(), newSprmBuffer);
paragraphs.add(papx);
}
}
logger.log(POILogger.DEBUG, "Merged (?) with PAPX from complex file table in ", Long.valueOf(System.currentTimeMillis() - start), " ms (", Integer.valueOf(paragraphs.size()), " elements in total)");
start = System.currentTimeMillis();
}
List<PAPX> oldPapxSortedByEndPos = new ArrayList<PAPX>(paragraphs);
Collections.sort(oldPapxSortedByEndPos, PropertyNode.EndComparator.instance);
logger.log(POILogger.DEBUG, "PAPX sorted by end position in ", Long.valueOf(System.currentTimeMillis() - start), " ms");
start = System.currentTimeMillis();
final Map<PAPX, Integer> papxToFileOrder = new IdentityHashMap<PAPX, Integer>();
{
int counter = 0;
for (PAPX papx : paragraphs) {
papxToFileOrder.put(papx, Integer.valueOf(counter++));
}
}
final Comparator<PAPX> papxFileOrderComparator = new Comparator<PAPX>() {
public int compare(PAPX o1, PAPX o2) {
Integer i1 = papxToFileOrder.get(o1);
Integer i2 = papxToFileOrder.get(o2);
return i1.compareTo(i2);
}
};
logger.log(POILogger.DEBUG, "PAPX's order map created in ", Long.valueOf(System.currentTimeMillis() - start), " ms");
start = System.currentTimeMillis();
List<PAPX> newPapxs = new LinkedList<PAPX>();
int lastParStart = 0;
int lastPapxIndex = 0;
for (int charIndex = 0; charIndex < docText.length(); charIndex++) {
final char c = docText.charAt(charIndex);
if (c != 13 && c != 7 && c != 12)
continue;
final int startInclusive = lastParStart;
final int endExclusive = charIndex + 1;
boolean broken = false;
List<PAPX> papxs = new LinkedList<PAPX>();
for (int papxIndex = lastPapxIndex; papxIndex < oldPapxSortedByEndPos.size(); papxIndex++) {
broken = false;
PAPX papx = oldPapxSortedByEndPos.get(papxIndex);
assert startInclusive == 0 || papxIndex + 1 == oldPapxSortedByEndPos.size() || papx.getEnd() > startInclusive;
if (papx.getEnd() - 1 > charIndex) {
lastPapxIndex = papxIndex;
broken = true;
break;
}
papxs.add(papx);
}
if (!broken) {
lastPapxIndex = oldPapxSortedByEndPos.size() - 1;
}
if (papxs.size() == 0) {
logger.log(POILogger.WARN, "Paragraph [", Integer.valueOf(startInclusive), "; ", Integer.valueOf(endExclusive), ") has no PAPX. Creating new one.");
// create it manually
PAPX papx = new PAPX(startInclusive, endExclusive, new SprmBuffer(2));
newPapxs.add(papx);
lastParStart = endExclusive;
continue;
}
if (papxs.size() == 1) {
// can we reuse existing?
PAPX existing = papxs.get(0);
if (existing.getStart() == startInclusive && existing.getEnd() == endExclusive) {
newPapxs.add(existing);
lastParStart = endExclusive;
continue;
}
}
// restore file order of PAPX
Collections.sort(papxs, papxFileOrderComparator);
SprmBuffer sprmBuffer = null;
for (PAPX papx : papxs) {
if (papx.getGrpprl() == null || papx.getGrpprl().length <= 2)
continue;
if (sprmBuffer == null) {
sprmBuffer = papx.getSprmBuf().clone();
} else {
sprmBuffer.append(papx.getGrpprl(), 2);
}
}
PAPX newPapx = new PAPX(startInclusive, endExclusive, sprmBuffer);
newPapxs.add(newPapx);
lastParStart = endExclusive;
continue;
}
paragraphs.clear();
paragraphs.addAll(newPapxs);
logger.log(POILogger.DEBUG, "PAPX rebuilded from document text in ", Long.valueOf(System.currentTimeMillis() - start), " ms (", Integer.valueOf(paragraphs.size()), " elements)");
start = System.currentTimeMillis();
}
use of org.apache.poi.hwpf.sprm.SprmIterator in project poi by apache.
the class HWPFLister method dumpStyles.
private void dumpStyles() {
if (_doc instanceof HWPFOldDocument) {
System.out.println("Word 95 not supported so far");
return;
}
HWPFDocument hwpfDocument = (HWPFDocument) _doc;
for (int s = 0; s < hwpfDocument.getStyleSheet().numStyles(); s++) {
StyleDescription styleDescription = hwpfDocument.getStyleSheet().getStyleDescription(s);
if (styleDescription == null)
continue;
System.out.println("=== Style #" + s + " '" + styleDescription.getName() + "' ===");
System.out.println(styleDescription);
if (styleDescription.getPAPX() != null)
dumpSprms(new SprmIterator(styleDescription.getPAPX(), 2), "Style's PAP SPRM: ");
if (styleDescription.getCHPX() != null)
dumpSprms(new SprmIterator(styleDescription.getCHPX(), 0), "Style's CHP SPRM: ");
}
}
Aggregations